lib/antlr3/task.rb

#!/usr/bin/ruby
# encoding: utf-8

require 'antlr3'
require 'set'
require 'rake'
require 'rake/tasklib'
require 'shellwords'

module ANTLR3

=begin rdoc ANTLR3::CompileTask

A rake task-generating utility concerning ANTLR grammar file
compilation. This is a general utility -- the grammars do
not have to be targetted for Ruby output; it handles all
known ANTLR language targets.

  require 'antlr3/task'

  ANTLR3::CompileTask.define(
    :name => 'grammars', :output_directory => 'lib/parsers'
  ) do | t |
    t.grammar_set( 'antlr/MainParser.g', 'antlr/MainTree.g' )

    t.grammar_set( 'antlr/Template.g' ) do | gram |
      gram.output_directory = 'lib/parsers/template'
      gram.debug = true
    end
  end


TODO: finish documentation

=end

class CompileTask < Rake::TaskLib
  attr_reader :grammar_sets, :options
  attr_accessor :name

  def self.define( *grammar_files )
    lib = new( *grammar_files )
    block_given? and yield( lib )
    lib.define
    return( lib )
  end

  def initialize( *grammar_files )
    grammar_files = [ grammar_files ].flatten!
    options = Hash === grammar_files.last ? grammar_files.pop : {}
    @grammar_sets = []
    @name = options.fetch( :name, 'antlr-grammars' )
    @options = options
    @namespace = Rake.application.current_scope
    grammar_files.empty? or grammar_set( grammar_files )
  end

  def target_files
    @grammar_sets.inject( [] ) do | list, set |
      list.concat( set.target_files )
    end
  end

  def grammar_set( *grammar_files )
    grammar_files = [ grammar_files ].flatten!
    options = @options.merge(
      Hash === grammar_files.last ? grammar_files.pop : {}
    )
    set = GrammarSet.new( grammar_files, options )
    block_given? and yield( set )
    @grammar_sets << set
    return( set )
  end

  def compile_task
    full_name = ( @namespace + [ @name, 'compile' ] ).join( ':' )
    Rake::Task[ full_name ]
  end

  def compile!
    compile_task.invoke
  end

  def clobber_task
    full_name = ( @namespace + [ @name, 'clobber' ] ).join( ':' )
    Rake::Task[ full_name ]
  end

  def clobber!
    clobber_task.invoke
  end

  def define
    namespace( @name ) do
      desc( "trash all ANTLR-generated source code" )
      task( 'clobber' ) do
        for set in @grammar_sets
          set.clean
        end
      end

      for set in @grammar_sets
        set.define_tasks
      end

      desc( "compile ANTLR grammars" )
      task( 'compile' => target_files )
    end
  end


#class CompileTask::GrammarSet
class GrammarSet
  attr_accessor :antlr_jar, :debug,
                :trace, :profile, :compile_options,
                :java_options
  attr_reader :load_path, :grammars
  attr_writer :output_directory

  def initialize( grammar_files, options = {} )
    @load_path = grammar_files.map { | f | File.dirname( f ) }
    @load_path.push( '.', @output_directory )

    if extra_load = options[ :load_path ]
      extra_load = [ extra_load ].flatten
      @load_path.unshift( extra_load )
    end
    @load_path.uniq!

    @grammars = grammar_files.map do | file |
      GrammarFile.new( self, file )
    end
    @output_directory = '.'
    dir = options[ :output_directory ] and @output_directory = dir.to_s

    @antlr_jar = options.fetch( :antlr_jar, ANTLR3.antlr_jar )
    @debug = options.fetch( :debug, false )
    @trace = options.fetch( :trace, false )
    @profile = options.fetch( :profile, false )
    @compile_options =
      case opts = options[ :compile_options ]
      when Array then opts
      else Shellwords.shellwords( opts.to_s )
      end
    @java_options =
      case opts = options[ :java_options ]
      when Array then opts
      else Shellwords.shellwords( opts.to_s )
      end
  end

  def target_files
    @grammars.map { | gram | gram.target_files }.flatten
  end

  def output_directory
    @output_directory || '.'
  end

  def define_tasks
    file( @antlr_jar )

    for grammar in @grammars
      deps = [ @antlr_jar ]
      if  vocab = grammar.token_vocab and
          tfile = find_tokens_file( vocab, grammar )
        file( tfile )
        deps << tfile
      end
      grammar.define_tasks( deps )
    end
  end

  def clean
    for grammar in @grammars
      grammar.clean
    end
    if test( ?d, output_directory ) and ( Dir.entries( output_directory ) - %w( . .. ) ).empty?
      rmdir( output_directory )
    end
  end

  def find_tokens_file( vocab, grammar )
    gram = @grammars.find { | gram | gram.name == vocab } and
      return( gram.tokens_file )
    file = locate( "#{ vocab }.tokens" ) and return( file )
    warn( Util.tidy( <<-END, true ) )
    | unable to locate .tokens file `#{ vocab }' referenced in #{ grammar.path }
    | -- ignoring dependency
    END
    return( nil )
  end

  def locate( file_name )
    dir = @load_path.find do | dir |
      File.file?( File.join( dir, file_name ) )
    end
    dir and return( File.join( dir, file_name ) )
  end

  def compile( grammar )
    dir = output_directory
    test( ?d, dir ) or FileUtils.mkpath( dir )
    sh( build_command( grammar ) )
  end

  def build_command( grammar )
    parts = [ 'java', '-cp', @antlr_jar ]
    parts.concat( @java_options )
    parts << 'org.antlr.Tool' << '-fo' << output_directory
    parts << '-debug' if @debug
    parts << '-profile' if @profile
    parts << '-trace' if @trace
    parts.concat( @compile_options )
    parts << grammar.path
    return parts.map! { | t | escape( t ) }.join( ' ' )
  end

  def escape( token )
    token = token.to_s.dup
    token.empty? and return( %('') )
    token.gsub!( /([^A-Za-z0-9_\-.,:\/@\n])/n, "\\\\\\1" )
    token.gsub!( /\n/, "'\n'" )
    return( token )
  end

end

class GrammarFile
  LANGUAGES = {
    "ActionScript" => [ ".as" ],
    "CSharp2" => [ ".cs" ],
    "C" => [ ".c", ".h" ],
    "ObjC" => [ ".m", ".h" ],
    "CSharp3" => [ ".cs" ],
    "Cpp" => [ ".cpp", ".h" ],
    "Ruby" => [ ".rb" ],
    "Java" => [ ".java" ],
    "JavaScript" => [ ".js" ],
    "Python" => [ ".py" ],
    "Delphi" => [ ".pas" ],
    "Perl5" => [ ".pm" ]
  }.freeze
  GRAMMAR_TYPES = %w(lexer parser tree combined)

  ##################################################################
  ######## CONSTRUCTOR #############################################
  ##################################################################

  def initialize( group, path, options = {} )
    @group = group
    @path = path.to_s
    @imports = []
    @language = 'Java'
    @token_vocab = nil
    @tasks_defined = false
    @extra_dependencies = []
    if extra = options[ :extra_dependencies ]
      extra = [ extra ].flatten
      @extra_dependencies.concat( extra )
    end

    study
    yield( self ) if block_given?
    fetch_imports
  end

  ##################################################################
  ######## ATTRIBUTES AND ATTRIBUTE-ISH METHODS ####################
  ##################################################################
  attr_reader :type, :name, :language, :source,
              :token_vocab, :imports, :imported_grammars,
              :path, :group

  for attr in [ :output_directory, :load_path, :antlr_jar ]
    class_eval( <<-END )
      def #{ attr }
        @group.#{ attr }
      end
    END
  end

  def lexer_files
    if lexer? then base = @name
    elsif combined? then base = @name + 'Lexer'
    else return( [] )
    end
    return( file_names( base ) )
  end

  def parser_files
    if parser? then base = @name
    elsif combined? then base = @name + 'Parser'
    else return( [] )
    end
    return( file_names( base ) )
  end

  def tree_parser_files
    return( tree? ? file_names( @name ) : [] )
  end

  def file_names( base )
    LANGUAGES.fetch( @language ).map do | ext |
      File.join( output_directory, base + ext )
    end
  end

  for type in GRAMMAR_TYPES
    class_eval( <<-END )
      def #{ type }?
        @type == #{ type.inspect }
      end
    END
  end

  def delegate_files( delegate_suffix )
    file_names( "#{ name }_#{ delegate_suffix }" )
  end

  def tokens_file
    File.join( output_directory, name + '.tokens' )
  end

  def target_files( all = true )
    targets = [ tokens_file ]

    for target_type in %w( lexer parser tree_parser )
      for file in self.send( :"#{ target_type }_files" )
        targets << file
      end
    end

    if all
      for grammar in @imported_grammars
        targets.concat( grammar.target_files )
      end
    end

    return targets
  end

  def update
    touch( @path )
  end

  def all_imported_files
    imported_files = []
    for grammar in @imported_grammars
      imported_files.push( grammar.path, *grammar.all_imported_files )
    end
    return imported_files
  end

  def clean
    deleted = []
    for target in target_files
      if test( ?f, target )
        rm( target )
        deleted << target
      end
    end

    for grammar in @imported_grammars
      deleted.concat( grammar.clean )
    end

    return deleted
  end

  def define_tasks( shared_depends )
    unless @tasks_defined
      depends = [ @path, *all_imported_files ]
      for f in depends
        file( f )
      end
      depends = shared_depends + depends

      target_files.each do | target |
        file( target => ( depends - [ target ] ) ) do   # prevents recursive .tokens file dependencies
          @group.compile( self )
        end
      end

      @tasks_defined = true
    end
  end

private

  def fetch_imports
    @imported_grammars = @imports.map do | imp |
      file = group.locate( "#{ imp }.g" ) or raise( Util.tidy( <<-END ) )
      | #{ @path }: unable to locate imported grammar file #{ imp }.g
      | search directories ( @load_path ):
      |   - #{ load_path.join( "\n  - " ) }
      END
      Imported.new( self, file )
    end
  end

  def study
    @source = File.read( @path )
    @source =~ /^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/ or
      raise Grammar::FormatError[ @source, @path ]
    @name = $2
    @type = $1 || 'combined'
    if @source =~ /^\s*options\s*\{(.*?)\}/m
      option_block = $1
      if option_block =~ /\s*language\s*=\s*(\S+)\s*;/
        @language = $1
        LANGUAGES.has_key?( @language ) or
          raise( Grammar::FormatError, "Unknown ANTLR target language: %p" % @language )
      end
      option_block =~ /\s*tokenVocab\s*=\s*(\S+)\s*;/ and
        @token_vocab = $1
    end

    @source.scan( /^\s*import\s+(\w+\s*(?:,\s*\w+\s*)*);/ ) do
      list = $1.strip
      @imports.concat( list.split( /\s*,\s*/ ) )
    end
  end
end # class Grammar

class GrammarFile::Imported < GrammarFile
  def initialize( owner, path )
    @owner = owner
    @path = path.to_s
    @imports = []
    @language = 'Java'
    @token_vocab = nil
    study
    fetch_imports
  end

  for attr in [ :load_path, :output_directory, :antlr_jar, :verbose, :group ]
    class_eval( <<-END )
      def #{ attr }
        @owner.#{ attr }
      end
    END
  end

  def delegate_files( suffix )
    @owner.delegate_files( "#{ @name }_#{ suffix }" )
  end

  def target_files
    targets = [ tokens_file ]
    targets.concat( @owner.delegate_files( @name ) )
    return( targets )
  end
end

class GrammarFile::FormatError < StandardError
  attr_reader :file, :source

  def self.[]( *args )
    new( *args )
  end

  def initialize( source, file = nil )
    @file = file
    @source = source
    message = ''
    if file.nil? # inline
      message << "bad inline grammar source:\n"
      message << ( "-" * 80 ) << "\n"
      message << @source
      message[ -1 ] == ?\n or message << "\n"
      message << ( "-" * 80 ) << "\n"
      message << "could not locate a grammar name and type declaration matching\n"
      message << "/^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/"
    else
      message << 'bad grammar source in file %p\n' % @file
      message << ( "-" * 80 ) << "\n"
      message << @source
      message[ -1 ] == ?\n or message << "\n"
      message << ( "-" * 80 ) << "\n"
      message << "could not locate a grammar name and type declaration matching\n"
      message << "/^\s*(lexer|parser|tree)?\s*grammar\s*(\S+)\s*;/"
    end
    super( message )
  end
end # error Grammar::FormatError
end # class CompileTask
end # module ANTLR3