Home | History | Annotate | Download | only in Runtime
      1 package ANTLR::Runtime::BaseRecognizer;
      2 
      3 use Readonly;
      4 use Carp;
      5 
      6 use ANTLR::Runtime::RecognizerSharedState;
      7 use ANTLR::Runtime::Token;
      8 use ANTLR::Runtime::UnwantedTokenException;
      9 use ANTLR::Runtime::MissingTokenException;
     10 use ANTLR::Runtime::MismatchedTokenException;
     11 
     12 use Moose;
     13 
     14 Readonly my $MEMO_RULE_FAILED => -2;
     15 sub MEMO_RULE_FAILED { $MEMO_RULE_FAILED }
     16 
     17 Readonly my $MEMO_RULE_UNKNOWN => -1;
     18 sub MEMO_RULE_UNKNOWN { $MEMO_RULE_UNKNOWN }
     19 
     20 Readonly my $INITIAL_FOLLOW_STACK_SIZE => 100;
     21 sub INITIAL_FOLLOW_STACK_SIZE { $INITIAL_FOLLOW_STACK_SIZE }
     22 
     23 # copies from Token object for convenience in actions
     24 Readonly my $DEFAULT_TOKEN_CHANNEL => ANTLR::Runtime::Token->DEFAULT_CHANNEL;
     25 sub DEFAULT_TOKEN_CHANNEL { $DEFAULT_TOKEN_CHANNEL }
     26 
     27 Readonly my $HIDDEN => ANTLR::Runtime::Token->HIDDEN_CHANNEL;
     28 sub HIDDEN { $HIDDEN }
     29 
     30 Readonly my $NEXT_TOKEN_RULE_NAME => 'next_token';
     31 sub NEXT_TOKEN_RULE_NAME { $NEXT_TOKEN_RULE_NAME }
     32 
     33 # State of a lexer, parser, or tree parser are collected into a state
     34 # object so the state can be shared.  This sharing is needed to
     35 # have one grammar import others and share same error variables
     36 # and other state variables.  It's a kind of explicit multiple
     37 # inheritance via delegation of methods and shared state.
     38 has 'state' => (
     39     is  => 'rw',
     40     isa => 'ANTLR::Runtime::RecognizerSharedState',
     41     default => sub { ANTLR::Runtime::RecognizerSharedState->new() },
     42 );
     43 
     44 sub reset {
     45     my ($self) = @_;
     46 
     47     if (!defined $self->state) {
     48         return;
     49     }
     50 
     51     my $state = $self->state;
     52     $state->_fsp(-1);
     53     $state->error_recovery(0);
     54     $state->last_error_index(-1);
     55     $state->failed(0);
     56     $state->syntax_errors(0);
     57 
     58     # wack everything related to backtracking and memoization
     59     $state->backtracking(0);
     60     # wipe cache
     61     $state->rule_memo([]);
     62 }
     63 
     64 sub match {
     65     Readonly my $usage => 'void match(IntStream input, int ttype, BitSet follow)';
     66     croak $usage if @_ != 4;
     67     my ($self, $input, $ttype, $follow) = @_;
     68 
     69     my $matched_symbol = $self->get_current_input_symbol($input);
     70     if ($input->LA(1) eq $ttype) {
     71         $input->consume();
     72         $self->state->error_recovery(0);
     73         $self->state->failed(0);
     74         return $matched_symbol;
     75     }
     76 
     77     if ($self->state->backtracking > 0) {
     78         $self->state->failed(1);
     79         return $matched_symbol;
     80     }
     81 
     82     return $self->recover_from_mismatched_token($input, $ttype, $follow);
     83 }
     84 
     85 sub match_any {
     86     Readonly my $usage => 'void match_any(IntStream input)';
     87     croak $usage if @_ != 2;
     88     my ($self, $input) = @_;
     89 
     90     $self->state->error_recovery(0);
     91     $self->state->failed(0);
     92     $input->consume();
     93 }
     94 
     95 sub mismatch_is_unwanted_token {
     96     my ($self, $input, $ttype) = @_;
     97     return $input->LA(2) == $ttype;
     98 }
     99 
    100 sub mismatch_is_missing_token {
    101     my ($self, $input, $follow) = @_;
    102 
    103     if (!defined $follow) {
    104         return 0;
    105     }
    106 
    107     if ($follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) {
    108         my $viable_tokens_following_this_rule = $self->compute_context_sensitive_rule_FOLLOW();
    109         $follow = $follow->or($viable_tokens_following_this_rule);
    110         if ($self->state->_fsp >= 0) {
    111             $follow->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE);
    112         }
    113     }
    114 
    115     if ($follow->member($input->LA(1)) || $follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) {
    116         return 1;
    117     }
    118     return 0;
    119 }
    120 
    121 sub mismatch {
    122     Readonly my $usage => 'void mismatch(IntStream input, int ttype, BitSet follow)';
    123     croak $usage if @_ != 4;
    124     my ($self, $input, $ttype, $follow) = @_;
    125 
    126     if ($self->mismatch_is_unwanted_token($input, $ttype)) {
    127         ANTLR::Runtime::UnwantedTokenException->new({
    128             expecting => $ttype,
    129             input => $input
    130         })->throw();
    131     }
    132     elsif ($self->mismatch_is_missing_token($input, $follow)) {
    133         ANTLR::Runtime::MissingTokenException->new({
    134             expecting => $ttype,
    135             input => $input
    136         })->throw();
    137     }
    138     else {
    139         ANTLR::Runtime::MismatchedTokenException->new({
    140             expecting => $ttype,
    141             input => $input
    142         })->throw();
    143     }
    144 }
    145 
    146 sub report_error {
    147     Readonly my $usage => 'void report_error(RecognitionException e)';
    148     croak $usage if @_ != 2;
    149     my ($self, $e) = @_;
    150 
    151     if ($self->state->error_recovery) {
    152         return;
    153     }
    154     $self->state->syntax_errors($self->state->syntax_errors + 1);
    155     $self->state->error_recovery(1);
    156 
    157     $self->display_recognition_error($self->get_token_names(), $e);
    158     return;
    159 }
    160 
    161 sub display_recognition_error {
    162     Readonly my $usage => 'void display_recognition_error(String[] token_names, RecognitionException e)';
    163     croak $usage if @_ != 3;
    164     my ($self, $token_names, $e) = @_;
    165 
    166     my $hdr = $self->get_error_header($e);
    167     my $msg = $self->get_error_message($e, $token_names);
    168     $self->emit_error_message("$hdr $msg");
    169 }
    170 
    171 sub get_error_message {
    172     Readonly my $usage => 'String get_error_message(RecognitionException e, String[] token_names)';
    173     croak $usage if @_ != 3;
    174     my ($self, $e, $token_names) = @_;
    175 
    176     if ($e->isa('ANTLR::Runtime::MismatchedTokenException')) {
    177         my $token_name;
    178         if ($e->get_expecting == ANTLR::Runtime::Token->EOF) {
    179             $token_name = 'EOF';
    180         } else {
    181             $token_name = $token_names->[$e->get_expecting];
    182         }
    183 
    184         return 'mismatched input ' . $self->get_token_error_display($e->get_token)
    185             . ' expecting ' . $token_name;
    186     } elsif ($e->isa('ANTLR::Runtime::MismatchedTreeNodeException')) {
    187         my $token_name;
    188         if ($e->get_expecting == ANTLR::Runtime::Token->EOF) {
    189             $token_name = 'EOF';
    190         } else {
    191             $token_name = $token_names->[$e->get_expecting];
    192         }
    193 
    194         return 'mismatched tree node: ' . $e->node
    195             . ' expecting ' . $token_name;
    196     } elsif ($e->isa('ANTLR::Runtime::NoViableAltException')) {
    197         return 'no viable alternative at input ' . $self->get_token_error_display($e->get_token);
    198     } elsif ($e->isa('ANTLR::Runtime::EarlyExitException')) {
    199         return 'required (...)+ loop did not match anything at input '
    200             . get_token_error_display($e->get_token);
    201     } elsif ($e->isa('ANTLR::Runtime::MismatchedSetException')) {
    202         return 'mismatched input ' . $self->get_token_error_display($e->get_token)
    203             . ' expecting set ' . $e->get_expecting;
    204     } elsif ($e->isa('ANTLR::Runtime::MismatchedNotSetException')) {
    205         return 'mismatched input ' . $self->get_token_error_display($e->get_token)
    206             . ' expecting set ' . $e->get_expecting;
    207     } elsif ($e->isa('ANTLR::Runtime::FailedPredicateException')) {
    208         return 'rule ' . $e->rule_name . ' failed predicate: {'
    209             . $e->predicate_text . '}?';
    210     } else {
    211         return undef;
    212     }
    213 }
    214 
    215 sub get_number_of_syntax_errors {
    216     my ($self) = @_;
    217     return $self->state->syntax_errors;
    218 }
    219 
    220 sub get_error_header {
    221     Readonly my $usage => 'String get_error_header(RecognitionException e)';
    222     croak $usage if @_ != 2;
    223     my ($self, $e) = @_;
    224 
    225     my $line = $e->get_line();
    226     my $col = $e->get_char_position_in_line();
    227 
    228     return "line $line:$col";
    229 }
    230 
    231 sub get_token_error_display {
    232     Readonly my $usage => 'String get_token_error_display(Token t)';
    233     croak $usage if @_ != 2;
    234     my ($self, $t) = @_;
    235 
    236     my $s = $t->get_text();
    237     if (!defined $s) {
    238         if ($t->get_type() == ANTLR::Runtime::Token->EOF) {
    239             $s = '<EOF>';
    240         } else {
    241             my $ttype = $t->get_type();
    242             $s = "<$ttype>";
    243         }
    244     }
    245 
    246     $s =~ s/\n/\\\\n/g;
    247     $s =~ s/\r/\\\\r/g;
    248     $s =~ s/\t/\\\\t/g;
    249 
    250     return "'$s'";
    251 }
    252 
    253 sub emit_error_message {
    254     Readonly my $usage => 'void emit_error_message(String msg)';
    255     croak $usage if @_ != 2;
    256     my ($self, $msg) = @_;
    257 
    258     print STDERR $msg, "\n";
    259 }
    260 
    261 sub recover {
    262     Readonly my $usage => 'void recover(IntStream input, RecognitionException re)';
    263     croak $usage if @_ != 3;
    264     my ($self, $input, $re) = @_;
    265 
    266     if ($self->state->last_error_index == $input->index()) {
    267 	# uh oh, another error at same token index; must be a case
    268 	# where LT(1) is in the recovery token set so nothing is
    269 	# consumed; consume a single token so at least to prevent
    270 	# an infinite loop; this is a failsafe.
    271         $input->consume();
    272     }
    273 
    274     $self->state->last_error_index($input->index());
    275     my $follow_set = $self->compute_error_recovery_set();
    276     $self->begin_resync();
    277     $self->consume_until($input, $follow_set);
    278     $self->end_resync();
    279 }
    280 
    281 sub begin_resync {
    282 }
    283 
    284 sub end_resync {
    285 }
    286 
    287 sub compute_error_recovery_set {
    288     Readonly my $usage => 'void compute_error_recovery_set()';
    289     croak $usage if @_ != 1;
    290     my ($self) = @_;
    291 
    292     $self->combine_follows(0);
    293 }
    294 
    295 sub compute_context_sensitive_rule_FOLLOW {
    296     Readonly my $usage => 'void compute_context_sensitive_rule_FOLLOW()';
    297     croak $usage if @_ != 1;
    298     my ($self) = @_;
    299 
    300     $self->combine_follows(1);
    301 }
    302 
    303 sub combine_follows {
    304     Readonly my $usage => 'BitSet combine_follows(boolean exact)';
    305     croak $usage if @_ != 2;
    306     my ($self, $exact) = @_;
    307 
    308     my $top = $self->state->_fsp;
    309     my $follow_set = ANTLR::Runtime::BitSet->new();
    310 
    311     foreach my $local_follow_set (reverse @{$self->state->following}) {
    312         $follow_set |= $local_follow_set;
    313         if ($exact && $local_follow_set->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) {
    314             last;
    315         }
    316     }
    317     #$follow_set->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE);
    318     return $follow_set;
    319 }
    320 
    321 sub recover_from_mismatched_token {
    322     Readonly my $usage => 'void recover_from_mismatched_token(IntStream input, int ttype, BitSet follow)';
    323     croak $usage if @_ != 4;
    324     my ($self, $input, $ttype, $follow) = @_;
    325 
    326     if ($self->mismatch_is_unwanted_token($input, $ttype)) {
    327         my $ex = ANTLR::Runtime::UnwantedTokenException->new({
    328             expecting => $ttype,
    329             input => $input
    330         });
    331 
    332         $self->begin_resync();
    333         $input->consume();
    334         $self->end_resync();
    335         $self->report_error($ex);
    336 
    337         my $matched_symbol = $self->get_current_input_symbol($input);
    338         $input->consume();
    339         return $matched_symbol;
    340     }
    341 
    342     if ($self->mismatch_is_missing_token($input, $follow)) {
    343         my $inserted = $self->get_missing_symbol({
    344                 input => $input,
    345                 expected_token_type => $ttype,
    346                 follow => $follow,
    347         });
    348         my $ex = ANTLR::Runtime::MissingTokenException({
    349             expecting => $ttype,
    350             input => $input,
    351             inserted => $inserted,
    352         });
    353         $self->report_error($ex);
    354         return $inserted;
    355     }
    356 
    357     ANTLR::Runtime::MismatchedTokenException->new({
    358         expecting => $ttype,
    359         input => $input,
    360     })->throw();
    361 }
    362 
    363 sub recover_from_mismatched_set {
    364     Readonly my $usage => 'void recover_from_mismatched_set(IntStream input, RecognitionException e, BitSet follow)';
    365     croak $usage if @_ != 4;
    366     my ($self, $input, $e, $follow) = @_;
    367 
    368     if ($self->mismatch_is_missing_token($input, $follow)) {
    369         $self->report_error($e);
    370         return $self->get_missing_symbol({
    371                 input => $input,
    372                 exception => $e,
    373                 expected_token_type => ANTLR::Runtime::Token->INVALID_TOKEN_TYPE,
    374                 follow => $follow,
    375             });
    376     }
    377 
    378     $e->throw();
    379 }
    380 
    381 sub recover_from_mismatched_element {
    382     Readonly my $usage => 'boolean recover_from_mismatched_element(IntStream input, RecognitionException e, BitSet follow)';
    383     croak $usage if @_ != 4;
    384     my ($self, $input, $e, $follow) = @_;
    385 
    386     return 0 if (!defined $follow);
    387 
    388     if ($follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) {
    389         my $viable_tokens_following_this_rule = $self->compute_context_sensitive_rule_FOLLOW();
    390         $follow |= $viable_tokens_following_this_rule;
    391         $follow->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE);
    392     }
    393 
    394     if ($follow->member($input->LA(1))) {
    395         $self->report_error($e);
    396         return 1;
    397     }
    398 
    399     return 0;
    400 }
    401 
    402 sub get_current_input_symbol {
    403     my ($self, $input) = @_;
    404     return undef;
    405 }
    406 
    407 sub get_missing_symbol {
    408     my ($self, $arg_ref) = @_;
    409     my $input = $arg_ref->{input};
    410     my $exception = $arg_ref->{exception};
    411     my $expected_token_type = $arg_ref->{expected_token_type};
    412     my $follow = $arg_ref->{follow};
    413 
    414     return undef;
    415 }
    416 
    417 sub consume_until {
    418     Readonly my $usage => 'void consume_until(IntStream input, (int token_type | BitSet set))';
    419     croak $usage if @_ != 3;
    420 
    421     if ($_[2]->isa('ANTLR::Runtime::BitSet')) {
    422         my ($self, $input, $set) = @_;
    423 
    424         my $ttype = $input->LA(1);
    425         while ($ttype != ANTLR::Runtime::Token->EOF && !$set->member($ttype)) {
    426             $input->consume();
    427             $ttype = $input->LA(1);
    428         }
    429     } else {
    430         my ($self, $input, $token_type) = @_;
    431 
    432         my $ttype = $input->LA(1);
    433         while ($ttype != ANTLR::Runtime::Token->EOF && $ttype != $token_type) {
    434             $input->consume();
    435             $ttype = $input->LA(1);
    436         }
    437     }
    438 }
    439 
    440 sub push_follow {
    441     Readonly my $usage => 'void push_follow(BitSet fset)';
    442     croak $usage if @_ != 2;
    443     my ($self, $fset) = @_;
    444 
    445     push @{$self->state->following}, $fset;
    446     $self->state->_fsp($self->state->_fsp + 1);
    447 }
    448 
    449 sub get_rule_invocation_stack {
    450     Readonly my $usage => 'List get_rule_invocation_stack()';
    451     croak $usage if @_ != 1;
    452     my ($self) = @_;
    453 
    454     my $rules = [];
    455     for (my $i = 0; ; ++$i) {
    456         my @frame = caller $i;
    457         last if !@frame;
    458 
    459         my ($package, $filename, $line, $subroutine) = @frame;
    460 
    461         if ($package =~ /^ANTLR::Runtime::/) {
    462             next;
    463         }
    464 
    465         if ($subroutine eq NEXT_TOKEN_RULE_NAME) {
    466             next;
    467         }
    468 
    469         if ($package ne ref $self) {
    470             next;
    471         }
    472 
    473         push @{$rules}, $subroutine;
    474     }
    475 }
    476 
    477 sub get_backtracking_level {
    478     Readonly my $usage => 'int get_backtracking_level()';
    479     croak $usage if @_ != 1;
    480     my ($self) = @_;
    481 
    482     return $self->state->backtracking;
    483 }
    484 
    485 sub set_backtracking_level {
    486     my ($self, $n) = @_;
    487     $self->state->backtracking($n);
    488 }
    489 
    490 sub failed {
    491     my ($self) = @_;
    492     return $self->state->failed;
    493 }
    494 
    495 sub get_token_names {
    496     return undef;
    497 }
    498 
    499 sub get_grammar_file_name {
    500     return undef;
    501 }
    502 
    503 sub to_strings {
    504     Readonly my $usage => 'List to_strings(List tokens)';
    505     croak $usage if @_ != 2;
    506     my ($self, $tokens) = @_;
    507 
    508     if (!defined $tokens) {
    509         return undef;
    510     }
    511 
    512     return map { $_->get_text() } @{$tokens};
    513 }
    514 
    515 sub get_rule_memoization {
    516     Readonly my $usage => 'int get_rule_memoization(int rule_index, int rule_start_index)';
    517     croak $usage if @_ != 3;
    518     my ($self, $rule_index, $rule_start_index) = @_;
    519 
    520     if (!defined $self->rule_memo->[$rule_index]) {
    521         $self->rule_memo->[$rule_index] = {};
    522     }
    523 
    524     my $stop_index = $self->state->rule_memo->[$rule_index]->{$rule_start_index};
    525     if (!defined $stop_index) {
    526         return $self->MEMO_RULE_UNKNOWN;
    527     }
    528     return $stop_index;
    529 }
    530 
    531 sub alredy_parsed_rule {
    532     Readonly my $usage => 'boolean alredy_parsed_rule(IntStream input, int rule_index)';
    533     croak $usage if @_ != 3;
    534     my ($self, $input, $rule_index) = @_;
    535 
    536     my $stop_index = $self->get_rule_memoization($rule_index, $input->index());
    537     if ($stop_index == $self->MEMO_RULE_UNKNOWN) {
    538         return 0;
    539     }
    540 
    541     if ($stop_index == $self->MEMO_RULE_FAILED) {
    542         $self->state->failed(1);
    543     } else {
    544         $input->seek($stop_index + 1);
    545     }
    546     return 1;
    547 }
    548 
    549 sub memoize {
    550     Readonly my $usage => 'void memoize(IntStream input, int rule_index, int rule_start_index)';
    551     croak $usage if @_ != 4;
    552     my ($self, $input, $rule_index, $rule_start_index) = @_;
    553 
    554     my $stop_token_index = $self->state->failed ? $self->MEMO_RULE_FAILED : $input->index() - 1;
    555     if (defined $self->state->rule_memo->[$rule_index]) {
    556         $self->state->rule_memo->[$rule_index]->{$rule_start_index} = $stop_token_index;
    557     }
    558 }
    559 
    560 sub get_rule_memoization_cache_size {
    561     Readonly my $usage => 'int get_rule_memoization_cache_size()';
    562     croak $usage if @_ != 1;
    563     my ($self) = @_;
    564 
    565     my $n = 0;
    566     foreach my $m (@{$self->state->rule_memo}) {
    567         $n += keys %{$m} if defined $m;
    568     }
    569 
    570     return $n;
    571 }
    572 
    573 sub trace_in {
    574     Readonly my $usage => 'void trace_in(String rule_name, int rule_index, input_symbol)';
    575     croak $usage if @_ != 4;
    576     my ($self, $rule_name, $rule_index, $input_symbol) = @_;
    577 
    578     print "enter $rule_name $input_symbol";
    579     if ($self->state->failed) {
    580         print ' failed=', $self->state->failed;
    581     }
    582     if ($self->state->backtracking > 0) {
    583         print ' backtracking=', $self->state->backtracking;
    584     }
    585     print "\n";
    586 }
    587 
    588 sub trace_out {
    589     Readonly my $usage => 'void trace_out(String rule_name, int rule_index, input_symbol)';
    590     croak $usage if @_ != 4;
    591     my ($self, $rule_name, $rule_index, $input_symbol) = @_;
    592 
    593     print "exit $rule_name $input_symbol";
    594     if ($self->state->failed) {
    595         print ' failed=', $self->state->failed;
    596     }
    597     if ($self->state->backtracking > 0) {
    598         print ' backtracking=', $self->state->backtracking;
    599     }
    600     print "\n";
    601 }
    602 
    603 no Moose;
    604 __PACKAGE__->meta->make_immutable();
    605 1;
    606 __END__
    607 
    608 =head1 NAME
    609 
    610 ANTLR::Runtime::BaseRecognizer
    611 
    612 =head1 DESCRIPTION
    613 
    614 A generic recognizer that can handle recognizers generated from
    615 lexer, parser, and tree grammars.  This is all the parsing
    616 support code essentially; most of it is error recovery stuff and
    617 backtracking.
    618