1 package ANTLR::Runtime::BaseRecognizer; 2 3 use Readonly; 4 use Carp; 5 6 use ANTLR::Runtime::RecognizerSharedState; 7 use ANTLR::Runtime::Token; 8 use ANTLR::Runtime::UnwantedTokenException; 9 use ANTLR::Runtime::MissingTokenException; 10 use ANTLR::Runtime::MismatchedTokenException; 11 12 use Moose; 13 14 Readonly my $MEMO_RULE_FAILED => -2; 15 sub MEMO_RULE_FAILED { $MEMO_RULE_FAILED } 16 17 Readonly my $MEMO_RULE_UNKNOWN => -1; 18 sub MEMO_RULE_UNKNOWN { $MEMO_RULE_UNKNOWN } 19 20 Readonly my $INITIAL_FOLLOW_STACK_SIZE => 100; 21 sub INITIAL_FOLLOW_STACK_SIZE { $INITIAL_FOLLOW_STACK_SIZE } 22 23 # copies from Token object for convenience in actions 24 Readonly my $DEFAULT_TOKEN_CHANNEL => ANTLR::Runtime::Token->DEFAULT_CHANNEL; 25 sub DEFAULT_TOKEN_CHANNEL { $DEFAULT_TOKEN_CHANNEL } 26 27 Readonly my $HIDDEN => ANTLR::Runtime::Token->HIDDEN_CHANNEL; 28 sub HIDDEN { $HIDDEN } 29 30 Readonly my $NEXT_TOKEN_RULE_NAME => 'next_token'; 31 sub NEXT_TOKEN_RULE_NAME { $NEXT_TOKEN_RULE_NAME } 32 33 # State of a lexer, parser, or tree parser are collected into a state 34 # object so the state can be shared. This sharing is needed to 35 # have one grammar import others and share same error variables 36 # and other state variables. It's a kind of explicit multiple 37 # inheritance via delegation of methods and shared state. 38 has 'state' => ( 39 is => 'rw', 40 isa => 'ANTLR::Runtime::RecognizerSharedState', 41 default => sub { ANTLR::Runtime::RecognizerSharedState->new() }, 42 ); 43 44 sub reset { 45 my ($self) = @_; 46 47 if (!defined $self->state) { 48 return; 49 } 50 51 my $state = $self->state; 52 $state->_fsp(-1); 53 $state->error_recovery(0); 54 $state->last_error_index(-1); 55 $state->failed(0); 56 $state->syntax_errors(0); 57 58 # wack everything related to backtracking and memoization 59 $state->backtracking(0); 60 # wipe cache 61 $state->rule_memo([]); 62 } 63 64 sub match { 65 Readonly my $usage => 'void match(IntStream input, int ttype, BitSet follow)'; 66 croak $usage if @_ != 4; 67 my ($self, $input, $ttype, $follow) = @_; 68 69 my $matched_symbol = $self->get_current_input_symbol($input); 70 if ($input->LA(1) eq $ttype) { 71 $input->consume(); 72 $self->state->error_recovery(0); 73 $self->state->failed(0); 74 return $matched_symbol; 75 } 76 77 if ($self->state->backtracking > 0) { 78 $self->state->failed(1); 79 return $matched_symbol; 80 } 81 82 return $self->recover_from_mismatched_token($input, $ttype, $follow); 83 } 84 85 sub match_any { 86 Readonly my $usage => 'void match_any(IntStream input)'; 87 croak $usage if @_ != 2; 88 my ($self, $input) = @_; 89 90 $self->state->error_recovery(0); 91 $self->state->failed(0); 92 $input->consume(); 93 } 94 95 sub mismatch_is_unwanted_token { 96 my ($self, $input, $ttype) = @_; 97 return $input->LA(2) == $ttype; 98 } 99 100 sub mismatch_is_missing_token { 101 my ($self, $input, $follow) = @_; 102 103 if (!defined $follow) { 104 return 0; 105 } 106 107 if ($follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) { 108 my $viable_tokens_following_this_rule = $self->compute_context_sensitive_rule_FOLLOW(); 109 $follow = $follow->or($viable_tokens_following_this_rule); 110 if ($self->state->_fsp >= 0) { 111 $follow->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE); 112 } 113 } 114 115 if ($follow->member($input->LA(1)) || $follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) { 116 return 1; 117 } 118 return 0; 119 } 120 121 sub mismatch { 122 Readonly my $usage => 'void mismatch(IntStream input, int ttype, BitSet follow)'; 123 croak $usage if @_ != 4; 124 my ($self, $input, $ttype, $follow) = @_; 125 126 if ($self->mismatch_is_unwanted_token($input, $ttype)) { 127 ANTLR::Runtime::UnwantedTokenException->new({ 128 expecting => $ttype, 129 input => $input 130 })->throw(); 131 } 132 elsif ($self->mismatch_is_missing_token($input, $follow)) { 133 ANTLR::Runtime::MissingTokenException->new({ 134 expecting => $ttype, 135 input => $input 136 })->throw(); 137 } 138 else { 139 ANTLR::Runtime::MismatchedTokenException->new({ 140 expecting => $ttype, 141 input => $input 142 })->throw(); 143 } 144 } 145 146 sub report_error { 147 Readonly my $usage => 'void report_error(RecognitionException e)'; 148 croak $usage if @_ != 2; 149 my ($self, $e) = @_; 150 151 if ($self->state->error_recovery) { 152 return; 153 } 154 $self->state->syntax_errors($self->state->syntax_errors + 1); 155 $self->state->error_recovery(1); 156 157 $self->display_recognition_error($self->get_token_names(), $e); 158 return; 159 } 160 161 sub display_recognition_error { 162 Readonly my $usage => 'void display_recognition_error(String[] token_names, RecognitionException e)'; 163 croak $usage if @_ != 3; 164 my ($self, $token_names, $e) = @_; 165 166 my $hdr = $self->get_error_header($e); 167 my $msg = $self->get_error_message($e, $token_names); 168 $self->emit_error_message("$hdr $msg"); 169 } 170 171 sub get_error_message { 172 Readonly my $usage => 'String get_error_message(RecognitionException e, String[] token_names)'; 173 croak $usage if @_ != 3; 174 my ($self, $e, $token_names) = @_; 175 176 if ($e->isa('ANTLR::Runtime::MismatchedTokenException')) { 177 my $token_name; 178 if ($e->get_expecting == ANTLR::Runtime::Token->EOF) { 179 $token_name = 'EOF'; 180 } else { 181 $token_name = $token_names->[$e->get_expecting]; 182 } 183 184 return 'mismatched input ' . $self->get_token_error_display($e->get_token) 185 . ' expecting ' . $token_name; 186 } elsif ($e->isa('ANTLR::Runtime::MismatchedTreeNodeException')) { 187 my $token_name; 188 if ($e->get_expecting == ANTLR::Runtime::Token->EOF) { 189 $token_name = 'EOF'; 190 } else { 191 $token_name = $token_names->[$e->get_expecting]; 192 } 193 194 return 'mismatched tree node: ' . $e->node 195 . ' expecting ' . $token_name; 196 } elsif ($e->isa('ANTLR::Runtime::NoViableAltException')) { 197 return 'no viable alternative at input ' . $self->get_token_error_display($e->get_token); 198 } elsif ($e->isa('ANTLR::Runtime::EarlyExitException')) { 199 return 'required (...)+ loop did not match anything at input ' 200 . get_token_error_display($e->get_token); 201 } elsif ($e->isa('ANTLR::Runtime::MismatchedSetException')) { 202 return 'mismatched input ' . $self->get_token_error_display($e->get_token) 203 . ' expecting set ' . $e->get_expecting; 204 } elsif ($e->isa('ANTLR::Runtime::MismatchedNotSetException')) { 205 return 'mismatched input ' . $self->get_token_error_display($e->get_token) 206 . ' expecting set ' . $e->get_expecting; 207 } elsif ($e->isa('ANTLR::Runtime::FailedPredicateException')) { 208 return 'rule ' . $e->rule_name . ' failed predicate: {' 209 . $e->predicate_text . '}?'; 210 } else { 211 return undef; 212 } 213 } 214 215 sub get_number_of_syntax_errors { 216 my ($self) = @_; 217 return $self->state->syntax_errors; 218 } 219 220 sub get_error_header { 221 Readonly my $usage => 'String get_error_header(RecognitionException e)'; 222 croak $usage if @_ != 2; 223 my ($self, $e) = @_; 224 225 my $line = $e->get_line(); 226 my $col = $e->get_char_position_in_line(); 227 228 return "line $line:$col"; 229 } 230 231 sub get_token_error_display { 232 Readonly my $usage => 'String get_token_error_display(Token t)'; 233 croak $usage if @_ != 2; 234 my ($self, $t) = @_; 235 236 my $s = $t->get_text(); 237 if (!defined $s) { 238 if ($t->get_type() == ANTLR::Runtime::Token->EOF) { 239 $s = '<EOF>'; 240 } else { 241 my $ttype = $t->get_type(); 242 $s = "<$ttype>"; 243 } 244 } 245 246 $s =~ s/\n/\\\\n/g; 247 $s =~ s/\r/\\\\r/g; 248 $s =~ s/\t/\\\\t/g; 249 250 return "'$s'"; 251 } 252 253 sub emit_error_message { 254 Readonly my $usage => 'void emit_error_message(String msg)'; 255 croak $usage if @_ != 2; 256 my ($self, $msg) = @_; 257 258 print STDERR $msg, "\n"; 259 } 260 261 sub recover { 262 Readonly my $usage => 'void recover(IntStream input, RecognitionException re)'; 263 croak $usage if @_ != 3; 264 my ($self, $input, $re) = @_; 265 266 if ($self->state->last_error_index == $input->index()) { 267 # uh oh, another error at same token index; must be a case 268 # where LT(1) is in the recovery token set so nothing is 269 # consumed; consume a single token so at least to prevent 270 # an infinite loop; this is a failsafe. 271 $input->consume(); 272 } 273 274 $self->state->last_error_index($input->index()); 275 my $follow_set = $self->compute_error_recovery_set(); 276 $self->begin_resync(); 277 $self->consume_until($input, $follow_set); 278 $self->end_resync(); 279 } 280 281 sub begin_resync { 282 } 283 284 sub end_resync { 285 } 286 287 sub compute_error_recovery_set { 288 Readonly my $usage => 'void compute_error_recovery_set()'; 289 croak $usage if @_ != 1; 290 my ($self) = @_; 291 292 $self->combine_follows(0); 293 } 294 295 sub compute_context_sensitive_rule_FOLLOW { 296 Readonly my $usage => 'void compute_context_sensitive_rule_FOLLOW()'; 297 croak $usage if @_ != 1; 298 my ($self) = @_; 299 300 $self->combine_follows(1); 301 } 302 303 sub combine_follows { 304 Readonly my $usage => 'BitSet combine_follows(boolean exact)'; 305 croak $usage if @_ != 2; 306 my ($self, $exact) = @_; 307 308 my $top = $self->state->_fsp; 309 my $follow_set = ANTLR::Runtime::BitSet->new(); 310 311 foreach my $local_follow_set (reverse @{$self->state->following}) { 312 $follow_set |= $local_follow_set; 313 if ($exact && $local_follow_set->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) { 314 last; 315 } 316 } 317 #$follow_set->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE); 318 return $follow_set; 319 } 320 321 sub recover_from_mismatched_token { 322 Readonly my $usage => 'void recover_from_mismatched_token(IntStream input, int ttype, BitSet follow)'; 323 croak $usage if @_ != 4; 324 my ($self, $input, $ttype, $follow) = @_; 325 326 if ($self->mismatch_is_unwanted_token($input, $ttype)) { 327 my $ex = ANTLR::Runtime::UnwantedTokenException->new({ 328 expecting => $ttype, 329 input => $input 330 }); 331 332 $self->begin_resync(); 333 $input->consume(); 334 $self->end_resync(); 335 $self->report_error($ex); 336 337 my $matched_symbol = $self->get_current_input_symbol($input); 338 $input->consume(); 339 return $matched_symbol; 340 } 341 342 if ($self->mismatch_is_missing_token($input, $follow)) { 343 my $inserted = $self->get_missing_symbol({ 344 input => $input, 345 expected_token_type => $ttype, 346 follow => $follow, 347 }); 348 my $ex = ANTLR::Runtime::MissingTokenException({ 349 expecting => $ttype, 350 input => $input, 351 inserted => $inserted, 352 }); 353 $self->report_error($ex); 354 return $inserted; 355 } 356 357 ANTLR::Runtime::MismatchedTokenException->new({ 358 expecting => $ttype, 359 input => $input, 360 })->throw(); 361 } 362 363 sub recover_from_mismatched_set { 364 Readonly my $usage => 'void recover_from_mismatched_set(IntStream input, RecognitionException e, BitSet follow)'; 365 croak $usage if @_ != 4; 366 my ($self, $input, $e, $follow) = @_; 367 368 if ($self->mismatch_is_missing_token($input, $follow)) { 369 $self->report_error($e); 370 return $self->get_missing_symbol({ 371 input => $input, 372 exception => $e, 373 expected_token_type => ANTLR::Runtime::Token->INVALID_TOKEN_TYPE, 374 follow => $follow, 375 }); 376 } 377 378 $e->throw(); 379 } 380 381 sub recover_from_mismatched_element { 382 Readonly my $usage => 'boolean recover_from_mismatched_element(IntStream input, RecognitionException e, BitSet follow)'; 383 croak $usage if @_ != 4; 384 my ($self, $input, $e, $follow) = @_; 385 386 return 0 if (!defined $follow); 387 388 if ($follow->member(ANTLR::Runtime::Token->EOR_TOKEN_TYPE)) { 389 my $viable_tokens_following_this_rule = $self->compute_context_sensitive_rule_FOLLOW(); 390 $follow |= $viable_tokens_following_this_rule; 391 $follow->remove(ANTLR::Runtime::Token->EOR_TOKEN_TYPE); 392 } 393 394 if ($follow->member($input->LA(1))) { 395 $self->report_error($e); 396 return 1; 397 } 398 399 return 0; 400 } 401 402 sub get_current_input_symbol { 403 my ($self, $input) = @_; 404 return undef; 405 } 406 407 sub get_missing_symbol { 408 my ($self, $arg_ref) = @_; 409 my $input = $arg_ref->{input}; 410 my $exception = $arg_ref->{exception}; 411 my $expected_token_type = $arg_ref->{expected_token_type}; 412 my $follow = $arg_ref->{follow}; 413 414 return undef; 415 } 416 417 sub consume_until { 418 Readonly my $usage => 'void consume_until(IntStream input, (int token_type | BitSet set))'; 419 croak $usage if @_ != 3; 420 421 if ($_[2]->isa('ANTLR::Runtime::BitSet')) { 422 my ($self, $input, $set) = @_; 423 424 my $ttype = $input->LA(1); 425 while ($ttype != ANTLR::Runtime::Token->EOF && !$set->member($ttype)) { 426 $input->consume(); 427 $ttype = $input->LA(1); 428 } 429 } else { 430 my ($self, $input, $token_type) = @_; 431 432 my $ttype = $input->LA(1); 433 while ($ttype != ANTLR::Runtime::Token->EOF && $ttype != $token_type) { 434 $input->consume(); 435 $ttype = $input->LA(1); 436 } 437 } 438 } 439 440 sub push_follow { 441 Readonly my $usage => 'void push_follow(BitSet fset)'; 442 croak $usage if @_ != 2; 443 my ($self, $fset) = @_; 444 445 push @{$self->state->following}, $fset; 446 $self->state->_fsp($self->state->_fsp + 1); 447 } 448 449 sub get_rule_invocation_stack { 450 Readonly my $usage => 'List get_rule_invocation_stack()'; 451 croak $usage if @_ != 1; 452 my ($self) = @_; 453 454 my $rules = []; 455 for (my $i = 0; ; ++$i) { 456 my @frame = caller $i; 457 last if !@frame; 458 459 my ($package, $filename, $line, $subroutine) = @frame; 460 461 if ($package =~ /^ANTLR::Runtime::/) { 462 next; 463 } 464 465 if ($subroutine eq NEXT_TOKEN_RULE_NAME) { 466 next; 467 } 468 469 if ($package ne ref $self) { 470 next; 471 } 472 473 push @{$rules}, $subroutine; 474 } 475 } 476 477 sub get_backtracking_level { 478 Readonly my $usage => 'int get_backtracking_level()'; 479 croak $usage if @_ != 1; 480 my ($self) = @_; 481 482 return $self->state->backtracking; 483 } 484 485 sub set_backtracking_level { 486 my ($self, $n) = @_; 487 $self->state->backtracking($n); 488 } 489 490 sub failed { 491 my ($self) = @_; 492 return $self->state->failed; 493 } 494 495 sub get_token_names { 496 return undef; 497 } 498 499 sub get_grammar_file_name { 500 return undef; 501 } 502 503 sub to_strings { 504 Readonly my $usage => 'List to_strings(List tokens)'; 505 croak $usage if @_ != 2; 506 my ($self, $tokens) = @_; 507 508 if (!defined $tokens) { 509 return undef; 510 } 511 512 return map { $_->get_text() } @{$tokens}; 513 } 514 515 sub get_rule_memoization { 516 Readonly my $usage => 'int get_rule_memoization(int rule_index, int rule_start_index)'; 517 croak $usage if @_ != 3; 518 my ($self, $rule_index, $rule_start_index) = @_; 519 520 if (!defined $self->rule_memo->[$rule_index]) { 521 $self->rule_memo->[$rule_index] = {}; 522 } 523 524 my $stop_index = $self->state->rule_memo->[$rule_index]->{$rule_start_index}; 525 if (!defined $stop_index) { 526 return $self->MEMO_RULE_UNKNOWN; 527 } 528 return $stop_index; 529 } 530 531 sub alredy_parsed_rule { 532 Readonly my $usage => 'boolean alredy_parsed_rule(IntStream input, int rule_index)'; 533 croak $usage if @_ != 3; 534 my ($self, $input, $rule_index) = @_; 535 536 my $stop_index = $self->get_rule_memoization($rule_index, $input->index()); 537 if ($stop_index == $self->MEMO_RULE_UNKNOWN) { 538 return 0; 539 } 540 541 if ($stop_index == $self->MEMO_RULE_FAILED) { 542 $self->state->failed(1); 543 } else { 544 $input->seek($stop_index + 1); 545 } 546 return 1; 547 } 548 549 sub memoize { 550 Readonly my $usage => 'void memoize(IntStream input, int rule_index, int rule_start_index)'; 551 croak $usage if @_ != 4; 552 my ($self, $input, $rule_index, $rule_start_index) = @_; 553 554 my $stop_token_index = $self->state->failed ? $self->MEMO_RULE_FAILED : $input->index() - 1; 555 if (defined $self->state->rule_memo->[$rule_index]) { 556 $self->state->rule_memo->[$rule_index]->{$rule_start_index} = $stop_token_index; 557 } 558 } 559 560 sub get_rule_memoization_cache_size { 561 Readonly my $usage => 'int get_rule_memoization_cache_size()'; 562 croak $usage if @_ != 1; 563 my ($self) = @_; 564 565 my $n = 0; 566 foreach my $m (@{$self->state->rule_memo}) { 567 $n += keys %{$m} if defined $m; 568 } 569 570 return $n; 571 } 572 573 sub trace_in { 574 Readonly my $usage => 'void trace_in(String rule_name, int rule_index, input_symbol)'; 575 croak $usage if @_ != 4; 576 my ($self, $rule_name, $rule_index, $input_symbol) = @_; 577 578 print "enter $rule_name $input_symbol"; 579 if ($self->state->failed) { 580 print ' failed=', $self->state->failed; 581 } 582 if ($self->state->backtracking > 0) { 583 print ' backtracking=', $self->state->backtracking; 584 } 585 print "\n"; 586 } 587 588 sub trace_out { 589 Readonly my $usage => 'void trace_out(String rule_name, int rule_index, input_symbol)'; 590 croak $usage if @_ != 4; 591 my ($self, $rule_name, $rule_index, $input_symbol) = @_; 592 593 print "exit $rule_name $input_symbol"; 594 if ($self->state->failed) { 595 print ' failed=', $self->state->failed; 596 } 597 if ($self->state->backtracking > 0) { 598 print ' backtracking=', $self->state->backtracking; 599 } 600 print "\n"; 601 } 602 603 no Moose; 604 __PACKAGE__->meta->make_immutable(); 605 1; 606 __END__ 607 608 =head1 NAME 609 610 ANTLR::Runtime::BaseRecognizer 611 612 =head1 DESCRIPTION 613 614 A generic recognizer that can handle recognizers generated from 615 lexer, parser, and tree grammars. This is all the parsing 616 support code essentially; most of it is error recovery stuff and 617 backtracking. 618