1 unit Antlr.Runtime; 2 (* 3 [The "BSD licence"] 4 Copyright (c) 2008 Erik van Bilsen 5 Copyright (c) 2005-2007 Kunle Odutola 6 All rights reserved. 7 8 Redistribution and use in source and binary forms, with or without 9 modification, are permitted provided that the following conditions 10 are met: 11 1. Redistributions of source code MUST RETAIN the above copyright 12 notice, this list of conditions and the following disclaimer. 13 2. Redistributions in binary form MUST REPRODUCE the above copyright 14 notice, this list of conditions and the following disclaimer in 15 the documentation and/or other materials provided with the 16 distribution. 17 3. The name of the author may not be used to endorse or promote products 18 derived from this software without specific prior WRITTEN permission. 19 4. Unless explicitly state otherwise, any contribution intentionally 20 submitted for inclusion in this work to the copyright owner or licensor 21 shall be under the terms and conditions of this license, without any 22 additional terms or conditions. 23 24 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 25 IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 28 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 29 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 30 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 31 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 32 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 33 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 *) 35 36 interface 37 38 {$IF CompilerVersion < 20} 39 {$MESSAGE ERROR 'You need Delphi 2009 or higher to use the Antlr runtime'} 40 {$IFEND} 41 42 uses 43 SysUtils, 44 Classes, 45 Generics.Defaults, 46 Generics.Collections, 47 Antlr.Runtime.Tools, 48 Antlr.Runtime.Collections; 49 50 type 51 TCharStreamConstants = (cscEOF = -1); 52 53 type 54 ERecognitionException = class; 55 ENoViableAltException = class; 56 57 /// <summary> 58 /// A simple stream of integers. This is useful when all we care about is the char 59 /// or token type sequence (such as for interpretation). 60 /// </summary> 61 IIntStream = interface(IANTLRInterface) 62 ['{6B851BDB-DD9C-422B-AD1E-567E52D2654F}'] 63 { Property accessors } 64 function GetSourceName: String; 65 66 { Methods } 67 /// <summary> 68 /// Advances the read position of the stream. Updates line and column state 69 /// </summary> 70 procedure Consume; 71 72 /// <summary> 73 /// Get int at current input pointer + I ahead (where I=1 is next int) 74 /// Negative indexes are allowed. LA(-1) is previous token (token just matched). 75 /// LA(-i) where i is before first token should yield -1, invalid char or EOF. 76 /// </summary> 77 function LA(I: Integer): Integer; 78 function LAChar(I: Integer): Char; 79 80 /// <summary>Tell the stream to start buffering if it hasn't already.</summary> 81 /// <remarks> 82 /// Executing Rewind(Mark()) on a stream should not affect the input position. 83 /// The Lexer tracks line/col info as well as input index so its markers are 84 /// not pure input indexes. Same for tree node streams. */ 85 /// </remarks> 86 /// <returns>Return a marker that can be passed to 87 /// <see cref="IIntStream.Rewind(Integer)"/> to return to the current position. 88 /// This could be the current input position, a value return from 89 /// <see cref="IIntStream.Index"/>, or some other marker.</returns> 90 function Mark: Integer; 91 92 /// <summary> 93 /// Return the current input symbol index 0..N where N indicates the 94 /// last symbol has been read. The index is the symbol about to be 95 /// read not the most recently read symbol. 96 /// </summary> 97 function Index: Integer; 98 99 /// <summary> 100 /// Resets the stream so that the next call to 101 /// <see cref="IIntStream.Index"/> would return marker. 102 /// </summary> 103 /// <remarks> 104 /// The marker will usually be <see cref="IIntStream.Index"/> but 105 /// it doesn't have to be. It's just a marker to indicate what 106 /// state the stream was in. This is essentially calling 107 /// <see cref="IIntStream.Release"/> and <see cref="IIntStream.Seek"/>. 108 /// If there are other markers created after the specified marker, 109 /// this routine must unroll them like a stack. Assumes the state the 110 /// stream was in when this marker was created. 111 /// </remarks> 112 procedure Rewind(const Marker: Integer); overload; 113 114 /// <summary> 115 /// Rewind to the input position of the last marker. 116 /// </summary> 117 /// <remarks> 118 /// Used currently only after a cyclic DFA and just before starting 119 /// a sem/syn predicate to get the input position back to the start 120 /// of the decision. Do not "pop" the marker off the state. Mark(I) 121 /// and Rewind(I) should balance still. It is like invoking 122 /// Rewind(last marker) but it should not "pop" the marker off. 123 /// It's like Seek(last marker's input position). 124 /// </remarks> 125 procedure Rewind; overload; 126 127 /// <summary> 128 /// You may want to commit to a backtrack but don't want to force the 129 /// stream to keep bookkeeping objects around for a marker that is 130 /// no longer necessary. This will have the same behavior as 131 /// <see cref="IIntStream.Rewind(Integer)"/> except it releases resources without 132 /// the backward seek. 133 /// </summary> 134 /// <remarks> 135 /// This must throw away resources for all markers back to the marker 136 /// argument. So if you're nested 5 levels of Mark(), and then Release(2) 137 /// you have to release resources for depths 2..5. 138 /// </remarks> 139 procedure Release(const Marker: Integer); 140 141 /// <summary> 142 /// Set the input cursor to the position indicated by index. This is 143 /// normally used to seek ahead in the input stream. 144 /// </summary> 145 /// <remarks> 146 /// No buffering is required to do this unless you know your stream 147 /// will use seek to move backwards such as when backtracking. 148 /// 149 /// This is different from rewind in its multi-directional requirement 150 /// and in that its argument is strictly an input cursor (index). 151 /// 152 /// For char streams, seeking forward must update the stream state such 153 /// as line number. For seeking backwards, you will be presumably 154 /// backtracking using the 155 /// <see cref="IIntStream.Mark"/>/<see cref="IIntStream.Rewind(Integer)"/> 156 /// mechanism that restores state and so this method does not need to 157 /// update state when seeking backwards. 158 /// 159 /// Currently, this method is only used for efficient backtracking using 160 /// memoization, but in the future it may be used for incremental parsing. 161 /// 162 /// The index is 0..N-1. A seek to position i means that LA(1) will return 163 /// the ith symbol. So, seeking to 0 means LA(1) will return the first 164 /// element in the stream. 165 /// </remarks> 166 procedure Seek(const Index: Integer); 167 168 /// <summary>Returns the size of the entire stream.</summary> 169 /// <remarks> 170 /// Only makes sense for streams that buffer everything up probably, 171 /// but might be useful to display the entire stream or for testing. 172 /// This value includes a single EOF. 173 /// </remarks> 174 function Size: Integer; 175 176 { Properties } 177 178 /// <summary> 179 /// Where are you getting symbols from? Normally, implementations will 180 /// pass the buck all the way to the lexer who can ask its input stream 181 /// for the file name or whatever. 182 /// </summary> 183 property SourceName: String read GetSourceName; 184 end; 185 186 /// <summary>A source of characters for an ANTLR lexer </summary> 187 ICharStream = interface(IIntStream) 188 ['{C30EF0DB-F4BD-4CBC-8C8F-828DABB6FF36}'] 189 { Property accessors } 190 function GetLine: Integer; 191 procedure SetLine(const Value: Integer); 192 function GetCharPositionInLine: Integer; 193 procedure SetCharPositionInLine(const Value: Integer); 194 195 { Methods } 196 197 /// <summary> 198 /// Get the ith character of lookahead. This is usually the same as 199 /// LA(I). This will be used for labels in the generated lexer code. 200 /// I'd prefer to return a char here type-wise, but it's probably 201 /// better to be 32-bit clean and be consistent with LA. 202 /// </summary> 203 function LT(const I: Integer): Integer; 204 205 /// <summary> 206 /// This primarily a useful interface for action code (just make sure 207 /// actions don't use this on streams that don't support it). 208 /// For infinite streams, you don't need this. 209 /// </summary> 210 function Substring(const Start, Stop: Integer): String; 211 212 { Properties } 213 214 /// <summary> 215 /// The current line in the character stream (ANTLR tracks the 216 /// line information automatically. To support rewinding character 217 /// streams, we are able to [re-]set the line. 218 /// </summary> 219 property Line: Integer read GetLine write SetLine; 220 221 /// <summary> 222 /// The index of the character relative to the beginning of the 223 /// line (0..N-1). To support rewinding character streams, we are 224 /// able to [re-]set the character position. 225 /// </summary> 226 property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine; 227 end; 228 229 IToken = interface(IANTLRInterface) 230 ['{73BF129C-2F45-4C68-838E-BF5D3536AC6D}'] 231 { Property accessors } 232 function GetTokenType: Integer; 233 procedure SetTokenType(const Value: Integer); 234 function GetLine: Integer; 235 procedure SetLine(const Value: Integer); 236 function GetCharPositionInLine: Integer; 237 procedure SetCharPositionInLine(const Value: Integer); 238 function GetChannel: Integer; 239 procedure SetChannel(const Value: Integer); 240 function GetTokenIndex: Integer; 241 procedure SetTokenIndex(const Value: Integer); 242 function GetText: String; 243 procedure SetText(const Value: String); 244 245 { Properties } 246 property TokenType: Integer read GetTokenType write SetTokenType; 247 248 /// <summary>The line number on which this token was matched; line=1..N</summary> 249 property Line: Integer read GetLine write SetLine; 250 251 /// <summary> 252 /// The index of the first character relative to the beginning of the line 0..N-1 253 /// </summary> 254 property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine; 255 256 /// <summary>The line number on which this token was matched; line=1..N</summary> 257 property Channel: Integer read GetChannel write SetChannel; 258 259 /// <summary> 260 /// An index from 0..N-1 of the token object in the input stream 261 /// </summary> 262 /// <remarks> 263 /// This must be valid in order to use the ANTLRWorks debugger. 264 /// </remarks> 265 property TokenIndex: Integer read GetTokenIndex write SetTokenIndex; 266 267 /// <summary>The text of the token</summary> 268 /// <remarks> 269 /// When setting the text, it might be a NOP such as for the CommonToken, 270 /// which doesn't have string pointers, just indexes into a char buffer. 271 /// </remarks> 272 property Text: String read GetText write SetText; 273 end; 274 275 /// <summary> 276 /// A source of tokens must provide a sequence of tokens via NextToken() 277 /// and also must reveal it's source of characters; CommonToken's text is 278 /// computed from a CharStream; it only store indices into the char stream. 279 /// 280 /// Errors from the lexer are never passed to the parser. Either you want 281 /// to keep going or you do not upon token recognition error. If you do not 282 /// want to continue lexing then you do not want to continue parsing. Just 283 /// throw an exception not under RecognitionException and Delphi will naturally 284 /// toss you all the way out of the recognizers. If you want to continue 285 /// lexing then you should not throw an exception to the parser--it has already 286 /// requested a token. Keep lexing until you get a valid one. Just report 287 /// errors and keep going, looking for a valid token. 288 /// </summary> 289 ITokenSource = interface(IANTLRInterface) 290 ['{2C71FAD0-AEEE-417D-B576-4059F7C4CEB4}'] 291 { Property accessors } 292 function GetSourceName: String; 293 294 { Methods } 295 296 /// <summary> 297 /// Returns a Token object from the input stream (usually a CharStream). 298 /// Does not fail/return upon lexing error; just keeps chewing on the 299 /// characters until it gets a good one; errors are not passed through 300 /// to the parser. 301 /// </summary> 302 function NextToken: IToken; 303 304 { Properties } 305 306 /// <summary> 307 /// Where are you getting tokens from? normally the implication will simply 308 /// ask lexers input stream. 309 /// </summary> 310 property SourceName: String read GetSourceName; 311 end; 312 313 /// <summary>A stream of tokens accessing tokens from a TokenSource </summary> 314 ITokenStream = interface(IIntStream) 315 ['{59E5B39D-31A6-496D-9FA9-AC75CC584B68}'] 316 { Property accessors } 317 function GetTokenSource: ITokenSource; 318 procedure SetTokenSource(const Value: ITokenSource); 319 320 { Methods } 321 322 /// <summary> 323 /// Get Token at current input pointer + I ahead (where I=1 is next 324 /// Token). 325 /// I < 0 indicates tokens in the past. So -1 is previous token and -2 is 326 /// two tokens ago. LT(0) is undefined. For I>=N, return Token.EOFToken. 327 /// Return null for LT(0) and any index that results in an absolute address 328 /// that is negative. 329 /// </summary> 330 function LT(const K: Integer): IToken; 331 332 /// <summary> 333 /// Get a token at an absolute index I; 0..N-1. This is really only 334 /// needed for profiling and debugging and token stream rewriting. 335 /// If you don't want to buffer up tokens, then this method makes no 336 /// sense for you. Naturally you can't use the rewrite stream feature. 337 /// I believe DebugTokenStream can easily be altered to not use 338 /// this method, removing the dependency. 339 /// </summary> 340 function Get(const I: Integer): IToken; 341 342 /// <summary>Return the text of all tokens from start to stop, inclusive. 343 /// If the stream does not buffer all the tokens then it can just 344 /// return ''; Users should not access $ruleLabel.text in 345 /// an action of course in that case. 346 /// </summary> 347 function ToString(const Start, Stop: Integer): String; overload; 348 349 /// <summary>Because the user is not required to use a token with an index stored 350 /// in it, we must provide a means for two token objects themselves to 351 /// indicate the start/end location. Most often this will just delegate 352 /// to the other ToString(Integer,Integer). This is also parallel with 353 /// the TreeNodeStream.ToString(Object,Object). 354 /// </summary> 355 function ToString(const Start, Stop: IToken): String; overload; 356 357 { Properties } 358 property TokenSource: ITokenSource read GetTokenSource write SetTokenSource; 359 end; 360 361 /// <summary> 362 /// This is the complete state of a stream. 363 /// 364 /// When walking ahead with cyclic DFA for syntactic predicates, we 365 /// need to record the state of the input stream (char index, line, 366 /// etc...) so that we can rewind the state after scanning ahead. 367 /// </summary> 368 ICharStreamState = interface(IANTLRInterface) 369 ['{62D2A1CD-ED3A-4C95-A366-AB8F2E54060B}'] 370 { Property accessors } 371 function GetP: Integer; 372 procedure SetP(const Value: Integer); 373 function GetLine: Integer; 374 procedure SetLine(const Value: Integer); 375 function GetCharPositionInLine: Integer; 376 procedure SetCharPositionInLine(const Value: Integer); 377 378 { Properties } 379 /// <summary>Index into the char stream of next lookahead char </summary> 380 property P: Integer read GetP write SetP; 381 382 /// <summary>What line number is the scanner at before processing buffer[P]? </summary> 383 property Line: Integer read GetLine write SetLine; 384 385 /// <summary>What char position 0..N-1 in line is scanner before processing buffer[P]? </summary> 386 property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine; 387 end; 388 389 /// <summary> 390 /// A pretty quick <see cref="ICharStream"/> that uses a character array 391 /// directly as it's underlying source. 392 /// </summary> 393 IANTLRStringStream = interface(ICharStream) 394 ['{2FA24299-FF97-4AB6-8CA6-5D3DA13C4AB2}'] 395 { Methods } 396 397 /// <summary> 398 /// Resets the stream so that it is in the same state it was 399 /// when the object was created *except* the data array is not 400 /// touched. 401 /// </summary> 402 procedure Reset; 403 404 end; 405 406 /// <summary> 407 /// A character stream - an <see cref="ICharStream"/> - that loads 408 /// and caches the contents of it's underlying file fully during 409 /// object construction 410 /// </summary> 411 /// <remarks> 412 /// This looks very much like an ANTLReaderStream or an ANTLRInputStream 413 /// but, it is a special case. Since we know the exact size of the file to 414 /// load, we can avoid lots of data copying and buffer resizing. 415 /// </remarks> 416 IANTLRFileStream = interface(IANTLRStringStream) 417 ['{2B0145DB-2DAA-48A0-8316-B47A69EDDD1A}'] 418 { Methods } 419 420 /// <summary> 421 /// Loads and buffers the specified file to be used as this 422 /// ANTLRFileStream's source 423 /// </summary> 424 /// <param name="FileName">File to load</param> 425 /// <param name="Encoding">Encoding to apply to file</param> 426 procedure Load(const FileName: String; const Encoding: TEncoding); 427 end; 428 429 /// <summary> 430 /// A stripped-down version of org.antlr.misc.BitSet that is just 431 /// good enough to handle runtime requirements such as FOLLOW sets 432 /// for automatic error recovery. 433 /// </summary> 434 IBitSet = interface(IANTLRInterface) 435 ['{F2045045-FC46-4779-A65D-56C65D257A8E}'] 436 { Property accessors } 437 function GetIsNil: Boolean; 438 439 { Methods } 440 441 /// <summary>return "this or a" in a new set </summary> 442 function BitSetOr(const A: IBitSet): IBitSet; 443 444 /// <summary>Or this element into this set (grow as necessary to accommodate)</summary> 445 procedure Add(const El: Integer); 446 447 /// <summary> Grows the set to a larger number of bits.</summary> 448 /// <param name="bit">element that must fit in set 449 /// </param> 450 procedure GrowToInclude(const Bit: Integer); 451 452 procedure OrInPlace(const A: IBitSet); 453 function Size: Integer; 454 function Member(const El: Integer): Boolean; 455 456 // remove this element from this set 457 procedure Remove(const El: Integer); 458 459 function NumBits: Integer; 460 461 /// <summary>return how much space is being used by the bits array not 462 /// how many actually have member bits on. 463 /// </summary> 464 function LengthInLongWords: Integer; 465 466 function ToArray: TIntegerArray; 467 function ToPackedArray: TUInt64Array; 468 469 function ToString: String; overload; 470 function ToString(const TokenNames: TStringArray): String; overload; 471 function Equals(Obj: TObject): Boolean; 472 473 { Properties } 474 property IsNil: Boolean read GetIsNil; 475 end; 476 TBitSetArray = array of IBitSet; 477 478 /// <summary> 479 /// The set of fields needed by an abstract recognizer to recognize input 480 /// and recover from errors 481 /// </summary> 482 /// <remarks> 483 /// As a separate state object, it can be shared among multiple grammars; 484 /// e.g., when one grammar imports another. 485 /// These fields are publicly visible but the actual state pointer per 486 /// parser is protected. 487 /// </remarks> 488 IRecognizerSharedState = interface(IANTLRInterface) 489 ['{6CB6E17A-0B01-4AA7-8D49-5742A3CB8901}'] 490 { Property accessors } 491 function GetFollowing: TBitSetArray; 492 procedure SetFollowing(const Value: TBitSetArray); 493 function GetFollowingStackPointer: Integer; 494 procedure SetFollowingStackPointer(const Value: Integer); 495 function GetErrorRecovery: Boolean; 496 procedure SetErrorRecovery(const Value: Boolean); 497 function GetLastErrorIndex: Integer; 498 procedure SetLastErrorIndex(const Value: Integer); 499 function GetFailed: Boolean; 500 procedure SetFailed(const Value: Boolean); 501 function GetSyntaxErrors: Integer; 502 procedure SetSyntaxErrors(const Value: Integer); 503 function GetBacktracking: Integer; 504 procedure SetBacktracking(const Value: Integer); 505 function GetRuleMemo: TDictionaryArray<Integer, Integer>; 506 function GetRuleMemoCount: Integer; 507 procedure SetRuleMemoCount(const Value: Integer); 508 function GetToken: IToken; 509 procedure SetToken(const Value: IToken); 510 function GetTokenStartCharIndex: Integer; 511 procedure SetTokenStartCharIndex(const Value: Integer); 512 function GetTokenStartLine: Integer; 513 procedure SetTokenStartLine(const Value: Integer); 514 function GetTokenStartCharPositionInLine: Integer; 515 procedure SetTokenStartCharPositionInLine(const Value: Integer); 516 function GetChannel: Integer; 517 procedure SetChannel(const Value: Integer); 518 function GetTokenType: Integer; 519 procedure SetTokenType(const Value: Integer); 520 function GetText: String; 521 procedure SetText(const Value: String); 522 523 { Properties } 524 525 /// <summary> 526 /// Tracks the set of token types that can follow any rule invocation. 527 /// Stack grows upwards. When it hits the max, it grows 2x in size 528 /// and keeps going. 529 /// </summary> 530 property Following: TBitSetArray read GetFollowing write SetFollowing; 531 property FollowingStackPointer: Integer read GetFollowingStackPointer write SetFollowingStackPointer; 532 533 /// <summary> 534 /// This is true when we see an error and before having successfully 535 /// matched a token. Prevents generation of more than one error message 536 /// per error. 537 /// </summary> 538 property ErrorRecovery: Boolean read GetErrorRecovery write SetErrorRecovery; 539 540 /// <summary> 541 /// The index into the input stream where the last error occurred. 542 /// </summary> 543 /// <remarks> 544 /// This is used to prevent infinite loops where an error is found 545 /// but no token is consumed during recovery...another error is found, 546 /// ad naseum. This is a failsafe mechanism to guarantee that at least 547 /// one token/tree node is consumed for two errors. 548 /// </remarks> 549 property LastErrorIndex: Integer read GetLastErrorIndex write SetLastErrorIndex; 550 551 /// <summary> 552 /// In lieu of a return value, this indicates that a rule or token 553 /// has failed to match. Reset to false upon valid token match. 554 /// </summary> 555 property Failed: Boolean read GetFailed write SetFailed; 556 557 /// <summary> 558 /// Did the recognizer encounter a syntax error? Track how many. 559 /// </summary> 560 property SyntaxErrors: Integer read GetSyntaxErrors write SetSyntaxErrors; 561 562 /// <summary> 563 /// If 0, no backtracking is going on. Safe to exec actions etc... 564 /// If >0 then it's the level of backtracking. 565 /// </summary> 566 property Backtracking: Integer read GetBacktracking write SetBacktracking; 567 568 /// <summary> 569 /// An array[size num rules] of Map<Integer,Integer> that tracks 570 /// the stop token index for each rule. 571 /// </summary> 572 /// <remarks> 573 /// RuleMemo[RuleIndex] is the memoization table for RuleIndex. 574 /// For key RuleStartIndex, you get back the stop token for 575 /// associated rule or MEMO_RULE_FAILED. 576 /// 577 /// This is only used if rule memoization is on (which it is by default). 578 /// </remarks> 579 property RuleMemo: TDictionaryArray<Integer, Integer> read GetRuleMemo; 580 property RuleMemoCount: Integer read GetRuleMemoCount write SetRuleMemoCount; 581 582 // Lexer Specific Members 583 // LEXER FIELDS (must be in same state object to avoid casting 584 // constantly in generated code and Lexer object) :( 585 586 /// <summary> 587 /// Token object normally returned by NextToken() after matching lexer rules. 588 /// </summary> 589 /// <remarks> 590 /// The goal of all lexer rules/methods is to create a token object. 591 /// This is an instance variable as multiple rules may collaborate to 592 /// create a single token. NextToken will return this object after 593 /// matching lexer rule(s). If you subclass to allow multiple token 594 /// emissions, then set this to the last token to be matched or 595 /// something nonnull so that the auto token emit mechanism will not 596 /// emit another token. 597 /// </remarks> 598 property Token: IToken read GetToken write SetToken; 599 600 /// <summary> 601 /// What character index in the stream did the current token start at? 602 /// </summary> 603 /// <remarks> 604 /// Needed, for example, to get the text for current token. Set at 605 /// the start of nextToken. 606 /// </remarks> 607 property TokenStartCharIndex: Integer read GetTokenStartCharIndex write SetTokenStartCharIndex; 608 609 /// <summary> 610 /// The line on which the first character of the token resides 611 /// </summary> 612 property TokenStartLine: Integer read GetTokenStartLine write SetTokenStartLine; 613 614 /// <summary>The character position of first character within the line</summary> 615 property TokenStartCharPositionInLine: Integer read GetTokenStartCharPositionInLine write SetTokenStartCharPositionInLine; 616 617 /// <summary>The channel number for the current token</summary> 618 property Channel: Integer read GetChannel write SetChannel; 619 620 /// <summary>The token type for the current token</summary> 621 property TokenType: Integer read GetTokenType write SetTokenType; 622 623 /// <summary> 624 /// You can set the text for the current token to override what is in 625 /// the input char buffer. Use setText() or can set this instance var. 626 /// </summary> 627 property Text: String read GetText write SetText; 628 end; 629 630 ICommonToken = interface(IToken) 631 ['{06B1B0C3-2A0D-477A-AE30-414F51ACE8A0}'] 632 { Property accessors } 633 function GetStartIndex: Integer; 634 procedure SetStartIndex(const Value: Integer); 635 function GetStopIndex: Integer; 636 procedure SetStopIndex(const Value: Integer); 637 function GetInputStream: ICharStream; 638 procedure SetInputStream(const Value: ICharStream); 639 640 { Methods } 641 function ToString: String; 642 643 { Properties } 644 property StartIndex: Integer read GetStartIndex write SetStartIndex; 645 property StopIndex: Integer read GetStopIndex write SetStopIndex; 646 property InputStream: ICharStream read GetInputStream write SetInputStream; 647 end; 648 649 /// <summary> 650 /// A Token object like we'd use in ANTLR 2.x; has an actual string created 651 /// and associated with this object. These objects are needed for imaginary 652 /// tree nodes that have payload objects. We need to create a Token object 653 /// that has a string; the tree node will point at this token. CommonToken 654 /// has indexes into a char stream and hence cannot be used to introduce 655 /// new strings. 656 /// </summary> 657 IClassicToken = interface(IToken) 658 { Property accessors } 659 function GetTokenType: Integer; 660 procedure SetTokenType(const Value: Integer); 661 function GetLine: Integer; 662 procedure SetLine(const Value: Integer); 663 function GetCharPositionInLine: Integer; 664 procedure SetCharPositionInLine(const Value: Integer); 665 function GetChannel: Integer; 666 procedure SetChannel(const Value: Integer); 667 function GetTokenIndex: Integer; 668 procedure SetTokenIndex(const Value: Integer); 669 function GetText: String; 670 procedure SetText(const Value: String); 671 function GetInputStream: ICharStream; 672 procedure SetInputStream(const Value: ICharStream); 673 674 { Properties } 675 property TokenType: Integer read GetTokenType write SetTokenType; 676 property Line: Integer read GetLine write SetLine; 677 property CharPositionInLine: Integer read GetCharPositionInLine write SetCharPositionInLine; 678 property Channel: Integer read GetChannel write SetChannel; 679 property TokenIndex: Integer read GetTokenIndex write SetTokenIndex; 680 property Text: String read GetText write SetText; 681 property InputStream: ICharStream read GetInputStream write SetInputStream; 682 end; 683 684 /// <summary> 685 /// A generic recognizer that can handle recognizers generated from 686 /// lexer, parser, and tree grammars. This is all the parsing 687 /// support code essentially; most of it is error recovery stuff and 688 /// backtracking. 689 /// </summary> 690 IBaseRecognizer = interface(IANTLRObject) 691 ['{90813CE2-614B-4773-A26E-936E7DE7E9E9}'] 692 { Property accessors } 693 function GetInput: IIntStream; 694 function GetBacktrackingLevel: Integer; 695 function GetState: IRecognizerSharedState; 696 function GetNumberOfSyntaxErrors: Integer; 697 function GetGrammarFileName: String; 698 function GetSourceName: String; 699 function GetTokenNames: TStringArray; 700 701 { Methods } 702 procedure BeginBacktrack(const Level: Integer); 703 procedure EndBacktrack(const Level: Integer; const Successful: Boolean); 704 705 /// <summary>Reset the parser's state. Subclasses must rewind the input stream.</summary> 706 procedure Reset; 707 708 /// <summary> 709 /// Match current input symbol against ttype. Attempt 710 /// single token insertion or deletion error recovery. If 711 /// that fails, throw EMismatchedTokenException. 712 /// </summary> 713 /// <remarks> 714 /// To turn off single token insertion or deletion error 715 /// recovery, override MismatchRecover() and have it call 716 /// plain Mismatch(), which does not recover. Then any error 717 /// in a rule will cause an exception and immediate exit from 718 /// rule. Rule would recover by resynchronizing to the set of 719 /// symbols that can follow rule ref. 720 /// </remarks> 721 function Match(const Input: IIntStream; const TokenType: Integer; 722 const Follow: IBitSet): IANTLRInterface; 723 724 function MismatchIsUnwantedToken(const Input: IIntStream; 725 const TokenType: Integer): Boolean; 726 727 function MismatchIsMissingToken(const Input: IIntStream; 728 const Follow: IBitSet): Boolean; 729 730 /// <summary>A hook to listen in on the token consumption during error recovery. 731 /// The DebugParser subclasses this to fire events to the listenter. 732 /// </summary> 733 procedure BeginResync; 734 procedure EndResync; 735 736 /// <summary> 737 /// Report a recognition problem. 738 /// </summary> 739 /// <remarks> 740 /// This method sets errorRecovery to indicate the parser is recovering 741 /// not parsing. Once in recovery mode, no errors are generated. 742 /// To get out of recovery mode, the parser must successfully Match 743 /// a token (after a resync). So it will go: 744 /// 745 /// 1. error occurs 746 /// 2. enter recovery mode, report error 747 /// 3. consume until token found in resynch set 748 /// 4. try to resume parsing 749 /// 5. next Match() will reset errorRecovery mode 750 /// 751 /// If you override, make sure to update syntaxErrors if you care about that. 752 /// </remarks> 753 procedure ReportError(const E: ERecognitionException); 754 755 /// <summary> Match the wildcard: in a symbol</summary> 756 procedure MatchAny(const Input: IIntStream); 757 758 procedure DisplayRecognitionError(const TokenNames: TStringArray; 759 const E: ERecognitionException); 760 761 /// <summary> 762 /// What error message should be generated for the various exception types? 763 /// 764 /// Not very object-oriented code, but I like having all error message generation 765 /// within one method rather than spread among all of the exception classes. This 766 /// also makes it much easier for the exception handling because the exception 767 /// classes do not have to have pointers back to this object to access utility 768 /// routines and so on. Also, changing the message for an exception type would be 769 /// difficult because you would have to subclassing exception, but then somehow get 770 /// ANTLR to make those kinds of exception objects instead of the default. 771 /// 772 /// This looks weird, but trust me--it makes the most sense in terms of flexibility. 773 /// 774 /// For grammar debugging, you will want to override this to add more information 775 /// such as the stack frame with GetRuleInvocationStack(e, this.GetType().Fullname) 776 /// and, for no viable alts, the decision description and state etc... 777 /// 778 /// Override this to change the message generated for one or more exception types. 779 /// </summary> 780 function GetErrorMessage(const E: ERecognitionException; 781 const TokenNames: TStringArray): String; 782 783 /// <summary> 784 /// What is the error header, normally line/character position information? 785 /// </summary> 786 function GetErrorHeader(const E: ERecognitionException): String; 787 788 /// <summary> 789 /// How should a token be displayed in an error message? The default 790 /// is to display just the text, but during development you might 791 /// want to have a lot of information spit out. Override in that case 792 /// to use t.ToString() (which, for CommonToken, dumps everything about 793 /// the token). This is better than forcing you to override a method in 794 /// your token objects because you don't have to go modify your lexer 795 /// so that it creates a new type. 796 /// </summary> 797 function GetTokenErrorDisplay(const T: IToken): String; 798 799 /// <summary> 800 /// Override this method to change where error messages go 801 /// </summary> 802 procedure EmitErrorMessage(const Msg: String); 803 804 /// <summary> 805 /// Recover from an error found on the input stream. This is 806 /// for NoViableAlt and mismatched symbol exceptions. If you enable 807 /// single token insertion and deletion, this will usually not 808 /// handle mismatched symbol exceptions but there could be a mismatched 809 /// token that the Match() routine could not recover from. 810 /// </summary> 811 procedure Recover(const Input: IIntStream; const RE: ERecognitionException); 812 813 // Not currently used 814 function RecoverFromMismatchedSet(const Input: IIntStream; 815 const E: ERecognitionException; const Follow: IBitSet): IANTLRInterface; 816 817 procedure ConsumeUntil(const Input: IIntStream; const TokenType: Integer); overload; 818 819 /// <summary>Consume tokens until one matches the given token set </summary> 820 procedure ConsumeUntil(const Input: IIntStream; const BitSet: IBitSet); overload; 821 822 /// <summary> 823 /// Returns List <String> of the rules in your parser instance 824 /// leading up to a call to this method. You could override if 825 /// you want more details such as the file/line info of where 826 /// in the parser source code a rule is invoked. 827 /// </summary> 828 /// <remarks> 829 /// NOT IMPLEMENTED IN THE DELPHI VERSION YET 830 /// This is very useful for error messages and for context-sensitive 831 /// error recovery. 832 /// </remarks> 833 //function GetRuleInvocationStack: IList<IANTLRInterface>; overload; 834 835 /// <summary> 836 /// A more general version of GetRuleInvocationStack where you can 837 /// pass in, for example, a RecognitionException to get it's rule 838 /// stack trace. This routine is shared with all recognizers, hence, 839 /// static. 840 /// 841 /// TODO: move to a utility class or something; weird having lexer call this 842 /// </summary> 843 /// <remarks> 844 /// NOT IMPLEMENTED IN THE DELPHI VERSION YET 845 /// </remarks> 846 //function GetRuleInvocationStack(const E: Exception; 847 // const RecognizerClassName: String): IList<IANTLRInterface>; overload; 848 849 /// <summary>A convenience method for use most often with template rewrites. 850 /// Convert a List<Token> to List<String> 851 /// </summary> 852 function ToStrings(const Tokens: IList<IToken>): IList<String>; 853 854 /// <summary> 855 /// Given a rule number and a start token index number, return 856 /// MEMO_RULE_UNKNOWN if the rule has not parsed input starting from 857 /// start index. If this rule has parsed input starting from the 858 /// start index before, then return where the rule stopped parsing. 859 /// It returns the index of the last token matched by the rule. 860 /// </summary> 861 /// <remarks> 862 /// For now we use a hashtable and just the slow Object-based one. 863 /// Later, we can make a special one for ints and also one that 864 /// tosses out data after we commit past input position i. 865 /// </remarks> 866 function GetRuleMemoization(const RuleIndex, RuleStartIndex: Integer): Integer; 867 868 /// <summary> 869 /// Has this rule already parsed input at the current index in the 870 /// input stream? Return the stop token index or MEMO_RULE_UNKNOWN. 871 /// If we attempted but failed to parse properly before, return 872 /// MEMO_RULE_FAILED. 873 /// 874 /// This method has a side-effect: if we have seen this input for 875 /// this rule and successfully parsed before, then seek ahead to 876 /// 1 past the stop token matched for this rule last time. 877 /// </summary> 878 function AlreadyParsedRule(const Input: IIntStream; 879 const RuleIndex: Integer): Boolean; 880 881 /// <summary> 882 /// Record whether or not this rule parsed the input at this position 883 /// successfully. Use a standard hashtable for now. 884 /// </summary> 885 procedure Memoize(const Input: IIntStream; const RuleIndex, 886 RuleStartIndex: Integer); 887 888 /// <summary> 889 /// Return how many rule/input-index pairs there are in total. 890 /// TODO: this includes synpreds. :( 891 /// </summary> 892 /// <returns></returns> 893 function GetRuleMemoizationChaceSize: Integer; 894 895 procedure TraceIn(const RuleName: String; const RuleIndex: Integer; 896 const InputSymbol: String); 897 procedure TraceOut(const RuleName: String; const RuleIndex: Integer; 898 const InputSymbol: String); 899 900 { Properties } 901 property Input: IIntStream read GetInput; 902 property BacktrackingLevel: Integer read GetBacktrackingLevel; 903 property State: IRecognizerSharedState read GetState; 904 905 /// <summary> 906 /// Get number of recognition errors (lexer, parser, tree parser). Each 907 /// recognizer tracks its own number. So parser and lexer each have 908 /// separate count. Does not count the spurious errors found between 909 /// an error and next valid token match 910 /// 911 /// See also ReportError() 912 /// </summary> 913 property NumberOfSyntaxErrors: Integer read GetNumberOfSyntaxErrors; 914 915 /// <summary> 916 /// For debugging and other purposes, might want the grammar name. 917 /// Have ANTLR generate an implementation for this property. 918 /// </summary> 919 /// <returns></returns> 920 property GrammarFileName: String read GetGrammarFileName; 921 922 /// <summary> 923 /// For debugging and other purposes, might want the source name. 924 /// Have ANTLR provide a hook for this property. 925 /// </summary> 926 /// <returns>The source name</returns> 927 property SourceName: String read GetSourceName; 928 929 /// <summary> 930 /// Used to print out token names like ID during debugging and 931 /// error reporting. The generated parsers implement a method 932 /// that overrides this to point to their string[] tokenNames. 933 /// </summary> 934 property TokenNames: TStringArray read GetTokenNames; 935 end; 936 937 /// <summary> 938 /// The most common stream of tokens is one where every token is buffered up 939 /// and tokens are prefiltered for a certain channel (the parser will only 940 /// see these tokens and cannot change the filter channel number during the 941 /// parse). 942 /// 943 /// TODO: how to access the full token stream? How to track all tokens matched per rule? 944 /// </summary> 945 ICommonTokenStream = interface(ITokenStream) 946 { Methods } 947 948 /// <summary> 949 /// A simple filter mechanism whereby you can tell this token stream 950 /// to force all tokens of type TType to be on Channel. 951 /// </summary> 952 /// 953 /// <remarks> 954 /// For example, 955 /// when interpreting, we cannot exec actions so we need to tell 956 /// the stream to force all WS and NEWLINE to be a different, ignored 957 /// channel. 958 /// </remarks> 959 procedure SetTokenTypeChannel(const TType, Channel: Integer); 960 961 procedure DiscardTokenType(const TType: Integer); 962 963 procedure DiscardOffChannelTokens(const Discard: Boolean); 964 965 function GetTokens: IList<IToken>; overload; 966 function GetTokens(const Start, Stop: Integer): IList<IToken>; overload; 967 968 /// <summary>Given a start and stop index, return a List of all tokens in 969 /// the token type BitSet. Return null if no tokens were found. This 970 /// method looks at both on and off channel tokens. 971 /// </summary> 972 function GetTokens(const Start, Stop: Integer; 973 const Types: IBitSet): IList<IToken>; overload; 974 975 function GetTokens(const Start, Stop: Integer; 976 const Types: IList<Integer>): IList<IToken>; overload; 977 978 function GetTokens(const Start, Stop, 979 TokenType: Integer): IList<IToken>; overload; 980 981 procedure Reset; 982 end; 983 984 IDFA = interface; 985 986 TSpecialStateTransitionHandler = function(const DFA: IDFA; S: Integer; 987 const Input: IIntStream): Integer of Object; 988 989 /// <summary> 990 /// A DFA implemented as a set of transition tables. 991 /// </summary> 992 /// <remarks> 993 /// <para> 994 /// Any state that has a semantic predicate edge is special; those states are 995 /// generated with if-then-else structures in a SpecialStateTransition() 996 /// which is generated by cyclicDFA template. 997 /// </para> 998 /// <para> 999 /// There are at most 32767 states (16-bit signed short). Could get away with byte 1000 /// sometimes but would have to generate different types and the simulation code too. 1001 /// </para> 1002 /// <para> 1003 /// As a point of reference, the Tokens rule DFA for the lexer in the Java grammar 1004 /// sample has approximately 326 states. 1005 /// </para> 1006 /// </remarks> 1007 IDFA = interface(IANTLRInterface) 1008 ['{36312B59-B718-48EF-A0EC-4529DE70F4C2}'] 1009 { Property accessors } 1010 function GetSpecialStateTransitionHandler: TSpecialStateTransitionHandler; 1011 procedure SetSpecialStateTransitionHandler(const Value: TSpecialStateTransitionHandler); 1012 1013 { Methods } 1014 1015 /// <summary> 1016 /// From the input stream, predict what alternative will succeed using this 1017 /// DFA (representing the covering regular approximation to the underlying CFL). 1018 /// </summary> 1019 /// <param name="Input">Input stream</param> 1020 /// <returns>Return an alternative number 1..N. Throw an exception upon error.</returns> 1021 function Predict(const Input: IIntStream): Integer; 1022 1023 /// <summary> 1024 /// A hook for debugging interface 1025 /// </summary> 1026 /// <param name="NVAE"></param> 1027 procedure Error(const NVAE: ENoViableAltException); 1028 1029 function SpecialStateTransition(const S: Integer; const Input: IIntStream): Integer; 1030 1031 function Description: String; 1032 1033 function SpecialTransition(const State, Symbol: Integer): Integer; 1034 1035 { Properties } 1036 property SpecialStateTransitionHandler: TSpecialStateTransitionHandler read GetSpecialStateTransitionHandler write SetSpecialStateTransitionHandler; 1037 end; 1038 1039 /// <summary> 1040 /// A lexer is recognizer that draws input symbols from a character stream. 1041 /// lexer grammars result in a subclass of this object. A Lexer object 1042 /// uses simplified Match() and error recovery mechanisms in the interest 1043 /// of speed. 1044 /// </summary> 1045 ILexer = interface(IBaseRecognizer) 1046 ['{331AAB49-E7CD-40E7-AEF5-427F7D6577AD}'] 1047 { Property accessors } 1048 function GetCharStream: ICharStream; 1049 procedure SetCharStream(const Value: ICharStream); 1050 function GetLine: Integer; 1051 function GetCharPositionInLine: Integer; 1052 function GetCharIndex: Integer; 1053 function GetText: String; 1054 procedure SetText(const Value: String); 1055 1056 { Methods } 1057 1058 /// <summary> 1059 /// Return a token from this source; i.e., Match a token on the char stream. 1060 /// </summary> 1061 function NextToken: IToken; 1062 1063 /// <summary> 1064 /// Instruct the lexer to skip creating a token for current lexer rule and 1065 /// look for another token. NextToken() knows to keep looking when a lexer 1066 /// rule finishes with token set to SKIP_TOKEN. Recall that if token==null 1067 /// at end of any token rule, it creates one for you and emits it. 1068 /// </summary> 1069 procedure Skip; 1070 1071 /// <summary>This is the lexer entry point that sets instance var 'token' </summary> 1072 procedure DoTokens; 1073 1074 /// <summary> 1075 /// Currently does not support multiple emits per nextToken invocation 1076 /// for efficiency reasons. Subclass and override this method and 1077 /// NextToken (to push tokens into a list and pull from that list rather 1078 /// than a single variable as this implementation does). 1079 /// </summary> 1080 procedure Emit(const Token: IToken); overload; 1081 1082 /// <summary> 1083 /// The standard method called to automatically emit a token at the 1084 /// outermost lexical rule. The token object should point into the 1085 /// char buffer start..stop. If there is a text override in 'text', 1086 /// use that to set the token's text. 1087 /// </summary> 1088 /// <remarks><para>Override this method to emit custom Token objects.</para> 1089 /// <para>If you are building trees, then you should also override 1090 /// Parser or TreeParser.GetMissingSymbol().</para> 1091 ///</remarks> 1092 function Emit: IToken; overload; 1093 1094 procedure Match(const S: String); overload; 1095 procedure Match(const C: Integer); overload; 1096 procedure MatchAny; 1097 procedure MatchRange(const A, B: Integer); 1098 1099 /// <summary> 1100 /// Lexers can normally Match any char in it's vocabulary after matching 1101 /// a token, so do the easy thing and just kill a character and hope 1102 /// it all works out. You can instead use the rule invocation stack 1103 /// to do sophisticated error recovery if you are in a Fragment rule. 1104 /// </summary> 1105 procedure Recover(const RE: ERecognitionException); 1106 1107 function GetCharErrorDisplay(const C: Integer): String; 1108 1109 procedure TraceIn(const RuleName: String; const RuleIndex: Integer); 1110 procedure TraceOut(const RuleName: String; const RuleIndex: Integer); 1111 1112 { Properties } 1113 1114 /// <summary>Set the char stream and reset the lexer </summary> 1115 property CharStream: ICharStream read GetCharStream write SetCharStream; 1116 property Line: Integer read GetLine; 1117 property CharPositionInLine: Integer read GetCharPositionInLine; 1118 1119 /// <summary>What is the index of the current character of lookahead? </summary> 1120 property CharIndex: Integer read GetCharIndex; 1121 1122 /// <summary> 1123 /// Gets or sets the 'lexeme' for the current token. 1124 /// </summary> 1125 /// <remarks> 1126 /// <para> 1127 /// The getter returns the text matched so far for the current token or any 1128 /// text override. 1129 /// </para> 1130 /// <para> 1131 /// The setter sets the complete text of this token. It overrides/wipes any 1132 /// previous changes to the text. 1133 /// </para> 1134 /// </remarks> 1135 property Text: String read GetText write SetText; 1136 end; 1137 1138 /// <summary>A parser for TokenStreams. Parser grammars result in a subclass 1139 /// of this. 1140 /// </summary> 1141 IParser = interface(IBaseRecognizer) 1142 ['{7420879A-5D1F-43CA-BD49-2264D7514501}'] 1143 { Property accessors } 1144 function GetTokenStream: ITokenStream; 1145 procedure SetTokenStream(const Value: ITokenStream); 1146 1147 { Methods } 1148 procedure TraceIn(const RuleName: String; const RuleIndex: Integer); 1149 procedure TraceOut(const RuleName: String; const RuleIndex: Integer); 1150 1151 { Properties } 1152 1153 /// <summary>Set the token stream and reset the parser </summary> 1154 property TokenStream: ITokenStream read GetTokenStream write SetTokenStream; 1155 end; 1156 1157 /// <summary> 1158 /// Rules can return start/stop info as well as possible trees and templates 1159 /// </summary> 1160 IRuleReturnScope = interface(IANTLRInterface) 1161 ['{E9870056-BF6D-4CB2-B71C-10B80797C0B4}'] 1162 { Property accessors } 1163 function GetStart: IANTLRInterface; 1164 procedure SetStart(const Value: IANTLRInterface); 1165 function GetStop: IANTLRInterface; 1166 procedure SetStop(const Value: IANTLRInterface); 1167 function GetTree: IANTLRInterface; 1168 procedure SetTree(const Value: IANTLRInterface); 1169 function GetTemplate: IANTLRInterface; 1170 1171 { Properties } 1172 1173 /// <summary>Return the start token or tree </summary> 1174 property Start: IANTLRInterface read GetStart write SetStart; 1175 1176 /// <summary>Return the stop token or tree </summary> 1177 property Stop: IANTLRInterface read GetStop write SetStop; 1178 1179 /// <summary>Has a value potentially if output=AST; </summary> 1180 property Tree: IANTLRInterface read GetTree write SetTree; 1181 1182 /// <summary> 1183 /// Has a value potentially if output=template; 1184 /// Don't use StringTemplate type to avoid dependency on ST assembly 1185 /// </summary> 1186 property Template: IANTLRInterface read GetTemplate; 1187 end; 1188 1189 /// <summary> 1190 /// Rules that return more than a single value must return an object 1191 /// containing all the values. Besides the properties defined in 1192 /// RuleLabelScope.PredefinedRulePropertiesScope there may be user-defined 1193 /// return values. This class simply defines the minimum properties that 1194 /// are always defined and methods to access the others that might be 1195 /// available depending on output option such as template and tree. 1196 /// 1197 /// Note text is not an actual property of the return value, it is computed 1198 /// from start and stop using the input stream's ToString() method. I 1199 /// could add a ctor to this so that we can pass in and store the input 1200 /// stream, but I'm not sure we want to do that. It would seem to be undefined 1201 /// to get the .text property anyway if the rule matches tokens from multiple 1202 /// input streams. 1203 /// 1204 /// I do not use getters for fields of objects that are used simply to 1205 /// group values such as this aggregate. 1206 /// </summary> 1207 IParserRuleReturnScope = interface(IRuleReturnScope) 1208 ['{9FB62050-E23B-4FE4-87D5-2C1EE67AEC3E}'] 1209 end; 1210 1211 /// <summary>Useful for dumping out the input stream after doing some 1212 /// augmentation or other manipulations. 1213 /// </summary> 1214 /// 1215 /// <remarks> 1216 /// You can insert stuff, Replace, and delete chunks. Note that the 1217 /// operations are done lazily--only if you convert the buffer to a 1218 /// String. This is very efficient because you are not moving data around 1219 /// all the time. As the buffer of tokens is converted to strings, the 1220 /// ToString() method(s) check to see if there is an operation at the 1221 /// current index. If so, the operation is done and then normal String 1222 /// rendering continues on the buffer. This is like having multiple Turing 1223 /// machine instruction streams (programs) operating on a single input tape. :) 1224 /// 1225 /// Since the operations are done lazily at ToString-time, operations do not 1226 /// screw up the token index values. That is, an insert operation at token 1227 /// index I does not change the index values for tokens I+1..N-1. 1228 /// 1229 /// Because operations never actually alter the buffer, you may always get 1230 /// the original token stream back without undoing anything. Since 1231 /// the instructions are queued up, you can easily simulate transactions and 1232 /// roll back any changes if there is an error just by removing instructions. 1233 /// For example, 1234 /// 1235 /// var 1236 /// Input: ICharStream; 1237 /// Lex: ILexer; 1238 /// Tokens: ITokenRewriteStream; 1239 /// Parser: IParser; 1240 /// Input := TANTLRFileStream.Create('input'); 1241 /// Lex := TLexer.Create(Input); 1242 /// Tokens := TTokenRewriteStream.Create(Lex); 1243 /// Parser := TParser.Create(tokens); 1244 /// Parser.startRule(); 1245 /// 1246 /// Then in the rules, you can execute 1247 /// var 1248 /// t,u: IToken; 1249 /// ... 1250 /// Input.InsertAfter(t, 'text to put after t'); 1251 /// Input.InsertAfter(u, 'text after u'); 1252 /// WriteLn(Tokens.ToString()); 1253 /// 1254 /// Actually, you have to cast the 'input' to a TokenRewriteStream. :( 1255 /// 1256 /// You can also have multiple "instruction streams" and get multiple 1257 /// rewrites from a single pass over the input. Just name the instruction 1258 /// streams and use that name again when printing the buffer. This could be 1259 /// useful for generating a C file and also its header file--all from the 1260 /// same buffer: 1261 /// 1262 /// Tokens.InsertAfter('pass1', t, 'text to put after t'); 1263 /// Tokens.InsertAfter('pass2', u, 'text after u'); 1264 /// WriteLn(Tokens.ToString('pass1')); 1265 /// WriteLn(Tokens.ToString('pass2')); 1266 /// 1267 /// If you don't use named rewrite streams, a "default" stream is used as 1268 /// the first example shows. 1269 /// </remarks> 1270 ITokenRewriteStream = interface(ICommonTokenStream) 1271 ['{7B49CBB6-9395-4781-B616-F201889EEA13}'] 1272 { Methods } 1273 procedure Rollback(const InstructionIndex: Integer); overload; 1274 1275 /// <summary>Rollback the instruction stream for a program so that 1276 /// the indicated instruction (via instructionIndex) is no 1277 /// longer in the stream. UNTESTED! 1278 /// </summary> 1279 procedure Rollback(const ProgramName: String; 1280 const InstructionIndex: Integer); overload; 1281 1282 procedure DeleteProgram; overload; 1283 1284 /// <summary>Reset the program so that no instructions exist </summary> 1285 procedure DeleteProgram(const ProgramName: String); overload; 1286 1287 procedure InsertAfter(const T: IToken; const Text: IANTLRInterface); overload; 1288 procedure InsertAfter(const Index: Integer; const Text: IANTLRInterface); overload; 1289 procedure InsertAfter(const ProgramName: String; const T: IToken; 1290 const Text: IANTLRInterface); overload; 1291 procedure InsertAfter(const ProgramName: String; const Index: Integer; 1292 const Text: IANTLRInterface); overload; 1293 procedure InsertAfter(const T: IToken; const Text: String); overload; 1294 procedure InsertAfter(const Index: Integer; const Text: String); overload; 1295 procedure InsertAfter(const ProgramName: String; const T: IToken; 1296 const Text: String); overload; 1297 procedure InsertAfter(const ProgramName: String; const Index: Integer; 1298 const Text: String); overload; 1299 1300 procedure InsertBefore(const T: IToken; const Text: IANTLRInterface); overload; 1301 procedure InsertBefore(const Index: Integer; const Text: IANTLRInterface); overload; 1302 procedure InsertBefore(const ProgramName: String; const T: IToken; 1303 const Text: IANTLRInterface); overload; 1304 procedure InsertBefore(const ProgramName: String; const Index: Integer; 1305 const Text: IANTLRInterface); overload; 1306 procedure InsertBefore(const T: IToken; const Text: String); overload; 1307 procedure InsertBefore(const Index: Integer; const Text: String); overload; 1308 procedure InsertBefore(const ProgramName: String; const T: IToken; 1309 const Text: String); overload; 1310 procedure InsertBefore(const ProgramName: String; const Index: Integer; 1311 const Text: String); overload; 1312 1313 procedure Replace(const Index: Integer; const Text: IANTLRInterface); overload; 1314 procedure Replace(const Start, Stop: Integer; const Text: IANTLRInterface); overload; 1315 procedure Replace(const IndexT: IToken; const Text: IANTLRInterface); overload; 1316 procedure Replace(const Start, Stop: IToken; const Text: IANTLRInterface); overload; 1317 procedure Replace(const ProgramName: String; const Start, Stop: Integer; 1318 const Text: IANTLRInterface); overload; 1319 procedure Replace(const ProgramName: String; const Start, Stop: IToken; 1320 const Text: IANTLRInterface); overload; 1321 procedure Replace(const Index: Integer; const Text: String); overload; 1322 procedure Replace(const Start, Stop: Integer; const Text: String); overload; 1323 procedure Replace(const IndexT: IToken; const Text: String); overload; 1324 procedure Replace(const Start, Stop: IToken; const Text: String); overload; 1325 procedure Replace(const ProgramName: String; const Start, Stop: Integer; 1326 const Text: String); overload; 1327 procedure Replace(const ProgramName: String; const Start, Stop: IToken; 1328 const Text: String); overload; 1329 1330 procedure Delete(const Index: Integer); overload; 1331 procedure Delete(const Start, Stop: Integer); overload; 1332 procedure Delete(const IndexT: IToken); overload; 1333 procedure Delete(const Start, Stop: IToken); overload; 1334 procedure Delete(const ProgramName: String; const Start, Stop: Integer); overload; 1335 procedure Delete(const ProgramName: String; const Start, Stop: IToken); overload; 1336 1337 function GetLastRewriteTokenIndex: Integer; 1338 1339 function ToOriginalString: String; overload; 1340 function ToOriginalString(const Start, Stop: Integer): String; overload; 1341 1342 function ToString(const ProgramName: String): String; overload; 1343 function ToString(const ProgramName: String; 1344 const Start, Stop: Integer): String; overload; 1345 1346 function ToDebugString: String; overload; 1347 function ToDebugString(const Start, Stop: Integer): String; overload; 1348 end; 1349 1350 /// <summary>The root of the ANTLR exception hierarchy.</summary> 1351 /// <remarks> 1352 /// To avoid English-only error messages and to generally make things 1353 /// as flexible as possible, these exceptions are not created with strings, 1354 /// but rather the information necessary to generate an error. Then 1355 /// the various reporting methods in Parser and Lexer can be overridden 1356 /// to generate a localized error message. For example, MismatchedToken 1357 /// exceptions are built with the expected token type. 1358 /// So, don't expect getMessage() to return anything. 1359 /// 1360 /// You can access the stack trace, which means that you can compute the 1361 /// complete trace of rules from the start symbol. This gives you considerable 1362 /// context information with which to generate useful error messages. 1363 /// 1364 /// ANTLR generates code that throws exceptions upon recognition error and 1365 /// also generates code to catch these exceptions in each rule. If you 1366 /// want to quit upon first error, you can turn off the automatic error 1367 /// handling mechanism using rulecatch action, but you still need to 1368 /// override methods mismatch and recoverFromMismatchSet. 1369 /// 1370 /// In general, the recognition exceptions can track where in a grammar a 1371 /// problem occurred and/or what was the expected input. While the parser 1372 /// knows its state (such as current input symbol and line info) that 1373 /// state can change before the exception is reported so current token index 1374 /// is computed and stored at exception time. From this info, you can 1375 /// perhaps print an entire line of input not just a single token, for example. 1376 /// Better to just say the recognizer had a problem and then let the parser 1377 /// figure out a fancy report. 1378 /// </remarks> 1379 ERecognitionException = class(Exception) 1380 strict private 1381 FApproximateLineInfo: Boolean; 1382 strict protected 1383 /// <summary>What input stream did the error occur in? </summary> 1384 FInput: IIntStream; 1385 1386 /// <summary> 1387 /// What is index of token/char were we looking at when the error occurred? 1388 /// </summary> 1389 FIndex: Integer; 1390 1391 /// <summary> 1392 /// The current Token when an error occurred. Since not all streams 1393 /// can retrieve the ith Token, we have to track the Token object. 1394 /// </summary> 1395 FToken: IToken; 1396 1397 /// <summary>[Tree parser] Node with the problem.</summary> 1398 FNode: IANTLRInterface; 1399 1400 /// <summary>The current char when an error occurred. For lexers. </summary> 1401 FC: Integer; 1402 1403 /// <summary>Track the line at which the error occurred in case this is 1404 /// generated from a lexer. We need to track this since the 1405 /// unexpected char doesn't carry the line info. 1406 /// </summary> 1407 FLine: Integer; 1408 FCharPositionInLine: Integer; 1409 strict protected 1410 procedure ExtractInformationFromTreeNodeStream(const Input: IIntStream); 1411 function GetUnexpectedType: Integer; virtual; 1412 public 1413 /// <summary>Used for remote debugger deserialization </summary> 1414 constructor Create; overload; 1415 constructor Create(const AMessage: String); overload; 1416 constructor Create(const AInput: IIntStream); overload; 1417 constructor Create(const AMessage: String; const AInput: IIntStream); overload; 1418 1419 /// <summary> 1420 /// If you are parsing a tree node stream, you will encounter some 1421 /// imaginary nodes w/o line/col info. We now search backwards looking 1422 /// for most recent token with line/col info, but notify getErrorHeader() 1423 /// that info is approximate. 1424 /// </summary> 1425 property ApproximateLineInfo: Boolean read FApproximateLineInfo write FApproximateLineInfo; 1426 1427 /// <summary> 1428 /// Returns the current Token when the error occurred (for parsers 1429 /// although a tree parser might also set the token) 1430 /// </summary> 1431 property Token: IToken read FToken write FToken; 1432 1433 /// <summary> 1434 /// Returns the [tree parser] node where the error occured (for tree parsers). 1435 /// </summary> 1436 property Node: IANTLRInterface read FNode write FNode; 1437 1438 /// <summary> 1439 /// Returns the line at which the error occurred (for lexers) 1440 /// </summary> 1441 property Line: Integer read FLine write FLine; 1442 1443 /// <summary> 1444 /// Returns the character position in the line when the error 1445 /// occurred (for lexers) 1446 /// </summary> 1447 property CharPositionInLine: Integer read FCharPositionInLine write FCharPositionInLine; 1448 1449 /// <summary>Returns the input stream in which the error occurred</summary> 1450 property Input: IIntStream read FInput write FInput; 1451 1452 /// <summary> 1453 /// Returns the token type or char of the unexpected input element 1454 /// </summary> 1455 property UnexpectedType: Integer read GetUnexpectedType; 1456 1457 /// <summary> 1458 /// Returns the current char when the error occurred (for lexers) 1459 /// </summary> 1460 property Character: Integer read FC write FC; 1461 1462 /// <summary> 1463 /// Returns the token/char index in the stream when the error occurred 1464 /// </summary> 1465 property Index: Integer read FIndex write FIndex; 1466 end; 1467 1468 /// <summary> 1469 /// A mismatched char or Token or tree node. 1470 /// </summary> 1471 EMismatchedTokenException = class(ERecognitionException) 1472 strict private 1473 FExpecting: Integer; 1474 public 1475 constructor Create(const AExpecting: Integer; const AInput: IIntStream); 1476 1477 function ToString: String; override; 1478 1479 property Expecting: Integer read FExpecting write FExpecting; 1480 end; 1481 1482 EUnwantedTokenException = class(EMismatchedTokenException) 1483 strict private 1484 function GetUnexpectedToken: IToken; 1485 public 1486 property UnexpectedToken: IToken read GetUnexpectedToken; 1487 1488 function ToString: String; override; 1489 end; 1490 1491 /// <summary> 1492 /// We were expecting a token but it's not found. The current token 1493 /// is actually what we wanted next. Used for tree node errors too. 1494 /// </summary> 1495 EMissingTokenException = class(EMismatchedTokenException) 1496 strict private 1497 FInserted: IANTLRInterface; 1498 function GetMissingType: Integer; 1499 public 1500 constructor Create(const AExpecting: Integer; const AInput: IIntStream; 1501 const AInserted: IANTLRInterface); 1502 1503 function ToString: String; override; 1504 1505 property MissingType: Integer read GetMissingType; 1506 property Inserted: IANTLRInterface read FInserted write FInserted; 1507 end; 1508 1509 EMismatchedTreeNodeException = class(ERecognitionException) 1510 strict private 1511 FExpecting: Integer; 1512 public 1513 constructor Create(const AExpecting: Integer; const AInput: IIntStream); 1514 1515 function ToString: String; override; 1516 1517 property Expecting: Integer read FExpecting write FExpecting; 1518 end; 1519 1520 ENoViableAltException = class(ERecognitionException) 1521 strict private 1522 FGrammarDecisionDescription: String; 1523 FDecisionNumber: Integer; 1524 FStateNumber: Integer; 1525 public 1526 constructor Create(const AGrammarDecisionDescription: String; 1527 const ADecisionNumber, AStateNumber: Integer; const AInput: IIntStream); 1528 1529 function ToString: String; override; 1530 1531 property GrammarDecisionDescription: String read FGrammarDecisionDescription; 1532 property DecisionNumber: Integer read FDecisionNumber; 1533 property StateNumber: Integer read FStateNumber; 1534 end; 1535 1536 EEarlyExitException = class(ERecognitionException) 1537 strict private 1538 FDecisionNumber: Integer; 1539 public 1540 constructor Create(const ADecisionNumber: Integer; const AInput: IIntStream); 1541 1542 property DecisionNumber: Integer read FDecisionNumber; 1543 end; 1544 1545 EMismatchedSetException = class(ERecognitionException) 1546 strict private 1547 FExpecting: IBitSet; 1548 public 1549 constructor Create(const AExpecting: IBitSet; const AInput: IIntStream); 1550 1551 function ToString: String; override; 1552 1553 property Expecting: IBitSet read FExpecting write FExpecting; 1554 end; 1555 1556 EMismatchedNotSetException = class(EMismatchedSetException) 1557 1558 public 1559 function ToString: String; override; 1560 end; 1561 1562 EFailedPredicateException = class(ERecognitionException) 1563 strict private 1564 FRuleName: String; 1565 FPredicateText: String; 1566 public 1567 constructor Create(const AInput: IIntStream; const ARuleName, 1568 APredicateText: String); 1569 1570 function ToString: String; override; 1571 1572 property RuleName: String read FRuleName write FRuleName; 1573 property PredicateText: String read FPredicateText write FPredicateText; 1574 end; 1575 1576 EMismatchedRangeException = class(ERecognitionException) 1577 strict private 1578 FA: Integer; 1579 FB: Integer; 1580 public 1581 constructor Create(const AA, AB: Integer; const AInput: IIntStream); 1582 1583 function ToString: String; override; 1584 1585 property A: Integer read FA write FA; 1586 property B: Integer read FB write FB; 1587 end; 1588 1589 type 1590 TCharStreamState = class(TANTLRObject, ICharStreamState) 1591 strict private 1592 FP: Integer; 1593 FLine: Integer; 1594 FCharPositionInLine: Integer; 1595 protected 1596 { ICharStreamState } 1597 function GetP: Integer; 1598 procedure SetP(const Value: Integer); 1599 function GetLine: Integer; 1600 procedure SetLine(const Value: Integer); 1601 function GetCharPositionInLine: Integer; 1602 procedure SetCharPositionInLine(const Value: Integer); 1603 end; 1604 1605 type 1606 TANTLRStringStream = class(TANTLRObject, IANTLRStringStream, ICharStream) 1607 private 1608 FData: PChar; 1609 FOwnsData: Boolean; 1610 1611 /// <summary>How many characters are actually in the buffer?</summary> 1612 FN: Integer; 1613 1614 /// <summary>Current line number within the input (1..n )</summary> 1615 FLine: Integer; 1616 1617 /// <summary>Index in our array for the next char (0..n-1)</summary> 1618 FP: Integer; 1619 1620 /// <summary> 1621 /// The index of the character relative to the beginning of the 1622 /// line (0..n-1) 1623 /// </summary> 1624 FCharPositionInLine: Integer; 1625 1626 /// <summary> 1627 /// Tracks the depth of nested <see cref="IIntStream.Mark"/> calls 1628 /// </summary> 1629 FMarkDepth: Integer; 1630 1631 /// <summary> 1632 /// A list of CharStreamState objects that tracks the stream state 1633 /// (i.e. line, charPositionInLine, and p) that can change as you 1634 /// move through the input stream. Indexed from 1..markDepth. 1635 /// A null is kept @ index 0. Create upon first call to Mark(). 1636 /// </summary> 1637 FMarkers: IList<ICharStreamState>; 1638 1639 /// <summary> 1640 /// Track the last Mark() call result value for use in Rewind(). 1641 /// </summary> 1642 FLastMarker: Integer; 1643 /// <summary> 1644 /// What is name or source of this char stream? 1645 /// </summary> 1646 FName: String; 1647 protected 1648 { IIntStream } 1649 function GetSourceName: String; virtual; 1650 1651 procedure Consume; virtual; 1652 function LA(I: Integer): Integer; virtual; 1653 function LAChar(I: Integer): Char; 1654 function Index: Integer; 1655 function Size: Integer; 1656 function Mark: Integer; virtual; 1657 procedure Rewind(const Marker: Integer); overload; virtual; 1658 procedure Rewind; overload; virtual; 1659 procedure Release(const Marker: Integer); virtual; 1660 procedure Seek(const Index: Integer); virtual; 1661 1662 property SourceName: String read GetSourceName write FName; 1663 protected 1664 { ICharStream } 1665 function GetLine: Integer; virtual; 1666 procedure SetLine(const Value: Integer); virtual; 1667 function GetCharPositionInLine: Integer; virtual; 1668 procedure SetCharPositionInLine(const Value: Integer); virtual; 1669 function LT(const I: Integer): Integer; virtual; 1670 function Substring(const Start, Stop: Integer): String; virtual; 1671 protected 1672 { IANTLRStringStream } 1673 procedure Reset; virtual; 1674 public 1675 constructor Create; overload; 1676 1677 /// <summary> 1678 /// Initializes a new instance of the ANTLRStringStream class for the 1679 /// specified string. This copies data from the string to a local 1680 /// character array 1681 /// </summary> 1682 constructor Create(const AInput: String); overload; 1683 1684 /// <summary> 1685 /// Initializes a new instance of the ANTLRStringStream class for the 1686 /// specified character array. This is the preferred constructor as 1687 /// no data is copied 1688 /// </summary> 1689 constructor Create(const AData: PChar; 1690 const ANumberOfActualCharsInArray: Integer); overload; 1691 1692 destructor Destroy; override; 1693 end; 1694 1695 TANTLRFileStream = class(TANTLRStringStream, IANTLRFileStream) 1696 strict private 1697 /// <summary>Fully qualified name of the stream's underlying file</summary> 1698 FFileName: String; 1699 protected 1700 { IIntStream } 1701 function GetSourceName: String; override; 1702 protected 1703 { IANTLRFileStream } 1704 1705 procedure Load(const FileName: String; const Encoding: TEncoding); virtual; 1706 public 1707 /// <summary> 1708 /// Initializes a new instance of the ANTLRFileStream class for the 1709 /// specified file name 1710 /// </summary> 1711 constructor Create(const AFileName: String); overload; 1712 1713 /// <summary> 1714 /// Initializes a new instance of the ANTLRFileStream class for the 1715 /// specified file name and encoding 1716 /// </summary> 1717 constructor Create(const AFileName: String; const AEncoding: TEncoding); overload; 1718 end; 1719 1720 TBitSet = class(TANTLRObject, IBitSet, ICloneable) 1721 strict private 1722 const 1723 BITS = 64; // number of bits / ulong 1724 LOG_BITS = 6; // 2 shl 6 = 64 1725 1726 ///<summary> We will often need to do a mod operator (i mod nbits). 1727 /// Its turns out that, for powers of two, this mod operation is 1728 /// same as <![CDATA[(I and (nbits-1))]]>. Since mod is slow, we use a precomputed 1729 /// mod mask to do the mod instead. 1730 /// </summary> 1731 MOD_MASK = BITS - 1; 1732 strict private 1733 /// <summary>The actual data bits </summary> 1734 FBits: TUInt64Array; 1735 strict private 1736 class function WordNumber(const Bit: Integer): Integer; static; 1737 class function BitMask(const BitNumber: Integer): UInt64; static; 1738 class function NumWordsToHold(const El: Integer): Integer; static; 1739 protected 1740 { ICloneable } 1741 function Clone: IANTLRInterface; virtual; 1742 protected 1743 { IBitSet } 1744 function GetIsNil: Boolean; virtual; 1745 function BitSetOr(const A: IBitSet): IBitSet; virtual; 1746 procedure Add(const El: Integer); virtual; 1747 procedure GrowToInclude(const Bit: Integer); virtual; 1748 procedure OrInPlace(const A: IBitSet); virtual; 1749 function Size: Integer; virtual; 1750 function Member(const El: Integer): Boolean; virtual; 1751 procedure Remove(const El: Integer); virtual; 1752 function NumBits: Integer; virtual; 1753 function LengthInLongWords: Integer; virtual; 1754 function ToArray: TIntegerArray; virtual; 1755 function ToPackedArray: TUInt64Array; virtual; 1756 function ToString(const TokenNames: TStringArray): String; reintroduce; overload; virtual; 1757 public 1758 /// <summary>Construct a bitset of size one word (64 bits) </summary> 1759 constructor Create; overload; 1760 1761 /// <summary>Construction from a static array of ulongs </summary> 1762 constructor Create(const ABits: array of UInt64); overload; 1763 1764 /// <summary>Construction from a list of integers </summary> 1765 constructor Create(const AItems: IList<Integer>); overload; 1766 1767 /// <summary>Construct a bitset given the size</summary> 1768 /// <param name="nbits">The size of the bitset in bits</param> 1769 constructor Create(const ANBits: Integer); overload; 1770 1771 class function BitSetOf(const El: Integer): IBitSet; overload; static; 1772 class function BitSetOf(const A, B: Integer): IBitSet; overload; static; 1773 class function BitSetOf(const A, B, C: Integer): IBitSet; overload; static; 1774 class function BitSetOf(const A, B, C, D: Integer): IBitSet; overload; static; 1775 1776 function ToString: String; overload; override; 1777 function Equals(Obj: TObject): Boolean; override; 1778 end; 1779 1780 TRecognizerSharedState = class(TANTLRObject, IRecognizerSharedState) 1781 strict private 1782 FFollowing: TBitSetArray; 1783 FFollowingStackPointer: Integer; 1784 FErrorRecovery: Boolean; 1785 FLastErrorIndex: Integer; 1786 FFailed: Boolean; 1787 FSyntaxErrors: Integer; 1788 FBacktracking: Integer; 1789 FRuleMemo: TDictionaryArray<Integer, Integer>; 1790 FToken: IToken; 1791 FTokenStartCharIndex: Integer; 1792 FTokenStartLine: Integer; 1793 FTokenStartCharPositionInLine: Integer; 1794 FChannel: Integer; 1795 FTokenType: Integer; 1796 FText: String; 1797 protected 1798 { IRecognizerSharedState } 1799 function GetFollowing: TBitSetArray; 1800 procedure SetFollowing(const Value: TBitSetArray); 1801 function GetFollowingStackPointer: Integer; 1802 procedure SetFollowingStackPointer(const Value: Integer); 1803 function GetErrorRecovery: Boolean; 1804 procedure SetErrorRecovery(const Value: Boolean); 1805 function GetLastErrorIndex: Integer; 1806 procedure SetLastErrorIndex(const Value: Integer); 1807 function GetFailed: Boolean; 1808 procedure SetFailed(const Value: Boolean); 1809 function GetSyntaxErrors: Integer; 1810 procedure SetSyntaxErrors(const Value: Integer); 1811 function GetBacktracking: Integer; 1812 procedure SetBacktracking(const Value: Integer); 1813 function GetRuleMemo: TDictionaryArray<Integer, Integer>; 1814 function GetRuleMemoCount: Integer; 1815 procedure SetRuleMemoCount(const Value: Integer); 1816 function GetToken: IToken; 1817 procedure SetToken(const Value: IToken); 1818 function GetTokenStartCharIndex: Integer; 1819 procedure SetTokenStartCharIndex(const Value: Integer); 1820 function GetTokenStartLine: Integer; 1821 procedure SetTokenStartLine(const Value: Integer); 1822 function GetTokenStartCharPositionInLine: Integer; 1823 procedure SetTokenStartCharPositionInLine(const Value: Integer); 1824 function GetChannel: Integer; 1825 procedure SetChannel(const Value: Integer); 1826 function GetTokenType: Integer; 1827 procedure SetTokenType(const Value: Integer); 1828 function GetText: String; 1829 procedure SetText(const Value: String); 1830 public 1831 constructor Create; 1832 end; 1833 1834 TCommonToken = class(TANTLRObject, ICommonToken, IToken) 1835 strict protected 1836 FTokenType: Integer; 1837 FLine: Integer; 1838 FCharPositionInLine: Integer; 1839 FChannel: Integer; 1840 FInput: ICharStream; 1841 1842 /// <summary>We need to be able to change the text once in a while. If 1843 /// this is non-null, then getText should return this. Note that 1844 /// start/stop are not affected by changing this. 1845 /// </summary> 1846 FText: String; 1847 1848 /// <summary>What token number is this from 0..n-1 tokens; < 0 implies invalid index </summary> 1849 FIndex: Integer; 1850 1851 /// <summary>The char position into the input buffer where this token starts </summary> 1852 FStart: Integer; 1853 1854 /// <summary>The char position into the input buffer where this token stops </summary> 1855 FStop: Integer; 1856 protected 1857 { IToken } 1858 function GetTokenType: Integer; virtual; 1859 procedure SetTokenType(const Value: Integer); virtual; 1860 function GetLine: Integer; virtual; 1861 procedure SetLine(const Value: Integer); virtual; 1862 function GetCharPositionInLine: Integer; virtual; 1863 procedure SetCharPositionInLine(const Value: Integer); virtual; 1864 function GetChannel: Integer; virtual; 1865 procedure SetChannel(const Value: Integer); virtual; 1866 function GetTokenIndex: Integer; virtual; 1867 procedure SetTokenIndex(const Value: Integer); virtual; 1868 function GetText: String; virtual; 1869 procedure SetText(const Value: String); virtual; 1870 protected 1871 { ICommonToken } 1872 function GetStartIndex: Integer; 1873 procedure SetStartIndex(const Value: Integer); 1874 function GetStopIndex: Integer; 1875 procedure SetStopIndex(const Value: Integer); 1876 function GetInputStream: ICharStream; 1877 procedure SetInputStream(const Value: ICharStream); 1878 protected 1879 constructor Create; overload; 1880 public 1881 constructor Create(const ATokenType: Integer); overload; 1882 constructor Create(const AInput: ICharStream; const ATokenType, AChannel, 1883 AStart, AStop: Integer); overload; 1884 constructor Create(const ATokenType: Integer; const AText: String); overload; 1885 constructor Create(const AOldToken: IToken); overload; 1886 1887 function ToString: String; override; 1888 end; 1889 1890 TClassicToken = class(TANTLRObject, IClassicToken, IToken) 1891 strict private 1892 FText: String; 1893 FTokenType: Integer; 1894 FLine: Integer; 1895 FCharPositionInLine: Integer; 1896 FChannel: Integer; 1897 1898 /// <summary>What token number is this from 0..n-1 tokens </summary> 1899 FIndex: Integer; 1900 protected 1901 { IClassicToken } 1902 function GetTokenType: Integer; virtual; 1903 procedure SetTokenType(const Value: Integer); virtual; 1904 function GetLine: Integer; virtual; 1905 procedure SetLine(const Value: Integer); virtual; 1906 function GetCharPositionInLine: Integer; virtual; 1907 procedure SetCharPositionInLine(const Value: Integer); virtual; 1908 function GetChannel: Integer; virtual; 1909 procedure SetChannel(const Value: Integer); virtual; 1910 function GetTokenIndex: Integer; virtual; 1911 procedure SetTokenIndex(const Value: Integer); virtual; 1912 function GetText: String; virtual; 1913 procedure SetText(const Value: String); virtual; 1914 function GetInputStream: ICharStream; virtual; 1915 procedure SetInputStream(const Value: ICharStream); virtual; 1916 public 1917 constructor Create(const ATokenType: Integer); overload; 1918 constructor Create(const AOldToken: IToken); overload; 1919 constructor Create(const ATokenType: Integer; const AText: String); overload; 1920 constructor Create(const ATokenType: Integer; const AText: String; 1921 const AChannel: Integer); overload; 1922 1923 function ToString: String; override; 1924 end; 1925 1926 TToken = class sealed 1927 public 1928 const 1929 EOR_TOKEN_TYPE = 1; 1930 1931 /// <summary>imaginary tree navigation type; traverse "get child" link </summary> 1932 DOWN = 2; 1933 1934 /// <summary>imaginary tree navigation type; finish with a child list </summary> 1935 UP = 3; 1936 1937 MIN_TOKEN_TYPE = UP + 1; 1938 EOF = Integer(cscEOF); 1939 INVALID_TOKEN_TYPE = 0; 1940 1941 /// <summary> 1942 /// All tokens go to the parser (unless skip() is called in that rule) 1943 /// on a particular "channel". The parser tunes to a particular channel 1944 /// so that whitespace etc... can go to the parser on a "hidden" channel. 1945 /// </summary> 1946 DEFAULT_CHANNEL = 0; 1947 1948 /// <summary> 1949 /// Anything on different channel than DEFAULT_CHANNEL is not parsed by parser. 1950 /// </summary> 1951 HIDDEN_CHANNEL = 99; 1952 public 1953 class var 1954 EOF_TOKEN: IToken; 1955 INVALID_TOKEN: IToken; 1956 /// <summary> 1957 /// In an action, a lexer rule can set token to this SKIP_TOKEN and ANTLR 1958 /// will avoid creating a token for this symbol and try to fetch another. 1959 /// </summary> 1960 SKIP_TOKEN: IToken; 1961 private 1962 class procedure Initialize; static; 1963 end; 1964 1965 /// <summary> 1966 /// Global constants 1967 /// </summary> 1968 TConstants = class sealed 1969 public 1970 const 1971 VERSION = '3.1b1'; 1972 1973 // Moved to version 2 for v3.1: added grammar name to enter/exit Rule 1974 DEBUG_PROTOCOL_VERSION = '2'; 1975 1976 ANTLRWORKS_DIR = 'antlrworks'; 1977 end; 1978 1979 TBaseRecognizer = class abstract(TANTLRObject, IBaseRecognizer) 1980 public 1981 const 1982 MEMO_RULE_FAILED = -2; 1983 MEMO_RULE_UNKNOWN = -1; 1984 INITIAL_FOLLOW_STACK_SIZE = 100; 1985 NEXT_TOKEN_RULE_NAME = 'nextToken'; 1986 // copies from Token object for convenience in actions 1987 DEFAULT_TOKEN_CHANNEL = TToken.DEFAULT_CHANNEL; 1988 HIDDEN = TToken.HIDDEN_CHANNEL; 1989 strict protected 1990 /// <summary> 1991 /// An externalized representation of the - shareable - internal state of 1992 /// this lexer, parser or tree parser. 1993 /// </summary> 1994 /// <remarks> 1995 /// The state of a lexer, parser, or tree parser are collected into 1996 /// external state objects so that the state can be shared. This sharing 1997 /// is needed to have one grammar import others and share same error 1998 /// variables and other state variables. It's a kind of explicit multiple 1999 /// inheritance via delegation of methods and shared state. 2000 /// </remarks> 2001 FState: IRecognizerSharedState; 2002 2003 property State: IRecognizerSharedState read FState; 2004 strict protected 2005 /// <summary> 2006 /// Match needs to return the current input symbol, which gets put 2007 /// into the label for the associated token ref; e.g., x=ID. Token 2008 /// and tree parsers need to return different objects. Rather than test 2009 /// for input stream type or change the IntStream interface, I use 2010 /// a simple method to ask the recognizer to tell me what the current 2011 /// input symbol is. 2012 /// </summary> 2013 /// <remarks>This is ignored for lexers.</remarks> 2014 function GetCurrentInputSymbol(const Input: IIntStream): IANTLRInterface; virtual; 2015 2016 /// <summary> 2017 /// Factor out what to do upon token mismatch so tree parsers can behave 2018 /// differently. Override and call MismatchRecover(input, ttype, follow) 2019 /// to get single token insertion and deletion. Use this to turn off 2020 /// single token insertion and deletion. Override mismatchRecover 2021 /// to call this instead. 2022 /// </summary> 2023 procedure Mismatch(const Input: IIntStream; const TokenType: Integer; 2024 const Follow: IBitSet); virtual; 2025 2026 /// <summary> 2027 /// Attempt to Recover from a single missing or extra token. 2028 /// </summary> 2029 /// <remarks> 2030 /// EXTRA TOKEN 2031 /// 2032 /// LA(1) is not what we are looking for. If LA(2) has the right token, 2033 /// however, then assume LA(1) is some extra spurious token. Delete it 2034 /// and LA(2) as if we were doing a normal Match(), which advances the 2035 /// input. 2036 /// 2037 /// MISSING TOKEN 2038 /// 2039 /// If current token is consistent with what could come after 2040 /// ttype then it is ok to "insert" the missing token, else throw 2041 /// exception For example, Input "i=(3;" is clearly missing the 2042 /// ')'. When the parser returns from the nested call to expr, it 2043 /// will have call chain: 2044 /// 2045 /// stat -> expr -> atom 2046 /// 2047 /// and it will be trying to Match the ')' at this point in the 2048 /// derivation: 2049 /// 2050 /// => ID '=' '(' INT ')' ('+' atom)* ';' 2051 /// ^ 2052 /// Match() will see that ';' doesn't Match ')' and report a 2053 /// mismatched token error. To Recover, it sees that LA(1)==';' 2054 /// is in the set of tokens that can follow the ')' token 2055 /// reference in rule atom. It can assume that you forgot the ')'. 2056 /// </remarks> 2057 function RecoverFromMismatchedToken(const Input: IIntStream; 2058 const TokenType: Integer; const Follow: IBitSet): IANTLRInterface; virtual; 2059 2060 /// <summary> 2061 /// Conjure up a missing token during error recovery. 2062 /// </summary> 2063 /// <remarks> 2064 /// The recognizer attempts to recover from single missing 2065 /// symbols. But, actions might refer to that missing symbol. 2066 /// For example, x=ID {f($x);}. The action clearly assumes 2067 /// that there has been an identifier matched previously and that 2068 /// $x points at that token. If that token is missing, but 2069 /// the next token in the stream is what we want we assume that 2070 /// this token is missing and we keep going. Because we 2071 /// have to return some token to replace the missing token, 2072 /// we have to conjure one up. This method gives the user control 2073 /// over the tokens returned for missing tokens. Mostly, 2074 /// you will want to create something special for identifier 2075 /// tokens. For literals such as '{' and ',', the default 2076 /// action in the parser or tree parser works. It simply creates 2077 /// a CommonToken of the appropriate type. The text will be the token. 2078 /// If you change what tokens must be created by the lexer, 2079 /// override this method to create the appropriate tokens. 2080 /// </remarks> 2081 function GetMissingSymbol(const Input: IIntStream; 2082 const E: ERecognitionException; const ExpectedTokenType: Integer; 2083 const Follow: IBitSet): IANTLRInterface; virtual; 2084 2085 /// <summary> 2086 /// Push a rule's follow set using our own hardcoded stack 2087 /// </summary> 2088 /// <param name="fset"></param> 2089 procedure PushFollow(const FSet: IBitSet); 2090 2091 /// <summary>Compute the context-sensitive FOLLOW set for current rule. 2092 /// This is set of token types that can follow a specific rule 2093 /// reference given a specific call chain. You get the set of 2094 /// viable tokens that can possibly come next (lookahead depth 1) 2095 /// given the current call chain. Contrast this with the 2096 /// definition of plain FOLLOW for rule r: 2097 /// 2098 /// FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)} 2099 /// 2100 /// where x in T* and alpha, beta in V*; T is set of terminals and 2101 /// V is the set of terminals and nonterminals. In other words, 2102 /// FOLLOW(r) is the set of all tokens that can possibly follow 2103 /// references to r in *any* sentential form (context). At 2104 /// runtime, however, we know precisely which context applies as 2105 /// we have the call chain. We may compute the exact (rather 2106 /// than covering superset) set of following tokens. 2107 /// 2108 /// For example, consider grammar: 2109 /// 2110 /// stat : ID '=' expr ';' // FOLLOW(stat)=={EOF} 2111 /// | "return" expr '.' 2112 /// ; 2113 /// expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'} 2114 /// atom : INT // FOLLOW(atom)=={'+',')',';','.'} 2115 /// | '(' expr ')' 2116 /// ; 2117 /// 2118 /// The FOLLOW sets are all inclusive whereas context-sensitive 2119 /// FOLLOW sets are precisely what could follow a rule reference. 2120 /// For input input "i=(3);", here is the derivation: 2121 /// 2122 /// stat => ID '=' expr ';' 2123 /// => ID '=' atom ('+' atom)* ';' 2124 /// => ID '=' '(' expr ')' ('+' atom)* ';' 2125 /// => ID '=' '(' atom ')' ('+' atom)* ';' 2126 /// => ID '=' '(' INT ')' ('+' atom)* ';' 2127 /// => ID '=' '(' INT ')' ';' 2128 /// 2129 /// At the "3" token, you'd have a call chain of 2130 /// 2131 /// stat -> expr -> atom -> expr -> atom 2132 /// 2133 /// What can follow that specific nested ref to atom? Exactly ')' 2134 /// as you can see by looking at the derivation of this specific 2135 /// input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}. 2136 /// 2137 /// You want the exact viable token set when recovering from a 2138 /// token mismatch. Upon token mismatch, if LA(1) is member of 2139 /// the viable next token set, then you know there is most likely 2140 /// a missing token in the input stream. "Insert" one by just not 2141 /// throwing an exception. 2142 /// </summary> 2143 function ComputeContextSensitiveRuleFOLLOW: IBitSet; virtual; 2144 2145 (* Compute the error recovery set for the current rule. During 2146 * rule invocation, the parser pushes the set of tokens that can 2147 * follow that rule reference on the stack; this amounts to 2148 * computing FIRST of what follows the rule reference in the 2149 * enclosing rule. This local follow set only includes tokens 2150 * from within the rule; i.e., the FIRST computation done by 2151 * ANTLR stops at the end of a rule. 2152 * 2153 * EXAMPLE 2154 * 2155 * When you find a "no viable alt exception", the input is not 2156 * consistent with any of the alternatives for rule r. The best 2157 * thing to do is to consume tokens until you see something that 2158 * can legally follow a call to r *or* any rule that called r. 2159 * You don't want the exact set of viable next tokens because the 2160 * input might just be missing a token--you might consume the 2161 * rest of the input looking for one of the missing tokens. 2162 * 2163 * Consider grammar: 2164 * 2165 * a : '[' b ']' 2166 * | '(' b ')' 2167 * ; 2168 * b : c '^' INT ; 2169 * c : ID 2170 * | INT 2171 * ; 2172 * 2173 * At each rule invocation, the set of tokens that could follow 2174 * that rule is pushed on a stack. Here are the various "local" 2175 * follow sets: 2176 * 2177 * FOLLOW(b1_in_a) = FIRST(']') = ']' 2178 * FOLLOW(b2_in_a) = FIRST(')') = ')' 2179 * FOLLOW(c_in_b) = FIRST('^') = '^' 2180 * 2181 * Upon erroneous input "[]", the call chain is 2182 * 2183 * a -> b -> c 2184 * 2185 * and, hence, the follow context stack is: 2186 * 2187 * depth local follow set after call to rule 2188 * 0 <EOF> a (from main()) 2189 * 1 ']' b 2190 * 3 '^' c 2191 * 2192 * Notice that ')' is not included, because b would have to have 2193 * been called from a different context in rule a for ')' to be 2194 * included. 2195 * 2196 * For error recovery, we cannot consider FOLLOW(c) 2197 * (context-sensitive or otherwise). We need the combined set of 2198 * all context-sensitive FOLLOW sets--the set of all tokens that 2199 * could follow any reference in the call chain. We need to 2200 * resync to one of those tokens. Note that FOLLOW(c)='^' and if 2201 * we resync'd to that token, we'd consume until EOF. We need to 2202 * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. 2203 * In this case, for input "[]", LA(1) is in this set so we would 2204 * not consume anything and after printing an error rule c would 2205 * return normally. It would not find the required '^' though. 2206 * At this point, it gets a mismatched token error and throws an 2207 * exception (since LA(1) is not in the viable following token 2208 * set). The rule exception handler tries to Recover, but finds 2209 * the same recovery set and doesn't consume anything. Rule b 2210 * exits normally returning to rule a. Now it finds the ']' (and 2211 * with the successful Match exits errorRecovery mode). 2212 * 2213 * So, you cna see that the parser walks up call chain looking 2214 * for the token that was a member of the recovery set. 2215 * 2216 * Errors are not generated in errorRecovery mode. 2217 * 2218 * ANTLR's error recovery mechanism is based upon original ideas: 2219 * 2220 * "Algorithms + Data Structures = Programs" by Niklaus Wirth 2221 * 2222 * and 2223 * 2224 * "A note on error recovery in recursive descent parsers": 2225 * http://portal.acm.org/citation.cfm?id=947902.947905 2226 * 2227 * Later, Josef Grosch had some good ideas: 2228 * 2229 * "Efficient and Comfortable Error Recovery in Recursive Descent 2230 * Parsers": 2231 * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip 2232 * 2233 * Like Grosch I implemented local FOLLOW sets that are combined 2234 * at run-time upon error to avoid overhead during parsing. 2235 *) 2236 function ComputeErrorRecoverySet: IBitSet; virtual; 2237 2238 function CombineFollows(const Exact: Boolean): IBitSet; 2239 protected 2240 { IBaseRecognizer } 2241 function GetInput: IIntStream; virtual; abstract; 2242 function GetBacktrackingLevel: Integer; 2243 function GetState: IRecognizerSharedState; 2244 function GetNumberOfSyntaxErrors: Integer; 2245 function GetGrammarFileName: String; virtual; 2246 function GetSourceName: String; virtual; abstract; 2247 function GetTokenNames: TStringArray; virtual; 2248 2249 procedure BeginBacktrack(const Level: Integer); virtual; 2250 procedure EndBacktrack(const Level: Integer; const Successful: Boolean); virtual; 2251 procedure Reset; virtual; 2252 function Match(const Input: IIntStream; const TokenType: Integer; 2253 const Follow: IBitSet): IANTLRInterface; virtual; 2254 function MismatchIsUnwantedToken(const Input: IIntStream; 2255 const TokenType: Integer): Boolean; 2256 function MismatchIsMissingToken(const Input: IIntStream; 2257 const Follow: IBitSet): Boolean; 2258 procedure BeginResync; virtual; 2259 procedure EndResync; virtual; 2260 procedure ReportError(const E: ERecognitionException); virtual; 2261 procedure MatchAny(const Input: IIntStream); virtual; 2262 procedure DisplayRecognitionError(const TokenNames: TStringArray; 2263 const E: ERecognitionException); virtual; 2264 function GetErrorMessage(const E: ERecognitionException; 2265 const TokenNames: TStringArray): String; virtual; 2266 function GetErrorHeader(const E: ERecognitionException): String; virtual; 2267 function GetTokenErrorDisplay(const T: IToken): String; virtual; 2268 procedure EmitErrorMessage(const Msg: String); virtual; 2269 procedure Recover(const Input: IIntStream; const RE: ERecognitionException); virtual; 2270 function RecoverFromMismatchedSet(const Input: IIntStream; 2271 const E: ERecognitionException; const Follow: IBitSet): IANTLRInterface; virtual; 2272 procedure ConsumeUntil(const Input: IIntStream; const TokenType: Integer); overload; virtual; 2273 procedure ConsumeUntil(const Input: IIntStream; const BitSet: IBitSet); overload; virtual; 2274 //function GetRuleInvocationStack: IList<IANTLRInterface>; overload; virtual; 2275 //function GetRuleInvocationStack(const E: Exception; 2276 // const RecognizerClassName: String): IList<IANTLRInterface>; overload; 2277 function ToStrings(const Tokens: IList<IToken>): IList<String>; virtual; 2278 function GetRuleMemoization(const RuleIndex, RuleStartIndex: Integer): Integer; virtual; 2279 function AlreadyParsedRule(const Input: IIntStream; 2280 const RuleIndex: Integer): Boolean; virtual; 2281 procedure Memoize(const Input: IIntStream; const RuleIndex, 2282 RuleStartIndex: Integer); virtual; 2283 function GetRuleMemoizationChaceSize: Integer; 2284 2285 procedure TraceIn(const RuleName: String; const RuleIndex: Integer; 2286 const InputSymbol: String); virtual; 2287 procedure TraceOut(const RuleName: String; const RuleIndex: Integer; 2288 const InputSymbol: String); virtual; 2289 2290 property Input: IIntStream read GetInput; 2291 public 2292 constructor Create; overload; 2293 constructor Create(const AState: IRecognizerSharedState); overload; 2294 end; 2295 2296 TCommonTokenStream = class(TANTLRObject, ICommonTokenStream, ITokenStream) 2297 strict private 2298 FTokenSource: ITokenSource; 2299 2300 /// <summary>Record every single token pulled from the source so we can reproduce 2301 /// chunks of it later. 2302 /// </summary> 2303 FTokens: IList<IToken>; 2304 2305 /// <summary><![CDATA[Map<tokentype, channel>]]> to override some Tokens' channel numbers </summary> 2306 FChannelOverrideMap: IDictionary<Integer, Integer>; 2307 2308 /// <summary><![CDATA[Set<tokentype>;]]> discard any tokens with this type </summary> 2309 FDiscardSet: IHashList<Integer, Integer>; 2310 2311 /// <summary>Skip tokens on any channel but this one; this is how we skip whitespace... </summary> 2312 FChannel: Integer; 2313 2314 /// <summary>By default, track all incoming tokens </summary> 2315 FDiscardOffChannelTokens: Boolean; 2316 2317 /// <summary>Track the last Mark() call result value for use in Rewind().</summary> 2318 FLastMarker: Integer; 2319 2320 /// <summary> 2321 /// The index into the tokens list of the current token (next token 2322 /// to consume). p==-1 indicates that the tokens list is empty 2323 /// </summary> 2324 FP: Integer; 2325 strict protected 2326 /// <summary>Load all tokens from the token source and put in tokens. 2327 /// This is done upon first LT request because you might want to 2328 /// set some token type / channel overrides before filling buffer. 2329 /// </summary> 2330 procedure FillBuffer; virtual; 2331 2332 /// <summary>Look backwards k tokens on-channel tokens </summary> 2333 function LB(const K: Integer): IToken; virtual; 2334 2335 /// <summary>Given a starting index, return the index of the first on-channel 2336 /// token. 2337 /// </summary> 2338 function SkipOffTokenChannels(const I: Integer): Integer; virtual; 2339 function SkipOffTokenChannelsReverse(const I: Integer): Integer; virtual; 2340 protected 2341 { IIntStream } 2342 function GetSourceName: String; virtual; 2343 2344 procedure Consume; virtual; 2345 function LA(I: Integer): Integer; virtual; 2346 function LAChar(I: Integer): Char; 2347 function Mark: Integer; virtual; 2348 function Index: Integer; virtual; 2349 procedure Rewind(const Marker: Integer); overload; virtual; 2350 procedure Rewind; overload; virtual; 2351 procedure Release(const Marker: Integer); virtual; 2352 procedure Seek(const Index: Integer); virtual; 2353 function Size: Integer; virtual; 2354 protected 2355 { ITokenStream } 2356 function GetTokenSource: ITokenSource; virtual; 2357 procedure SetTokenSource(const Value: ITokenSource); virtual; 2358 2359 function LT(const K: Integer): IToken; virtual; 2360 function Get(const I: Integer): IToken; virtual; 2361 function ToString(const Start, Stop: Integer): String; reintroduce; overload; virtual; 2362 function ToString(const Start, Stop: IToken): String; reintroduce; overload; virtual; 2363 protected 2364 { ICommonTokenStream } 2365 procedure SetTokenTypeChannel(const TType, Channel: Integer); 2366 procedure DiscardTokenType(const TType: Integer); 2367 procedure DiscardOffChannelTokens(const Discard: Boolean); 2368 function GetTokens: IList<IToken>; overload; 2369 function GetTokens(const Start, Stop: Integer): IList<IToken>; overload; 2370 function GetTokens(const Start, Stop: Integer; 2371 const Types: IBitSet): IList<IToken>; overload; 2372 function GetTokens(const Start, Stop: Integer; 2373 const Types: IList<Integer>): IList<IToken>; overload; 2374 function GetTokens(const Start, Stop, 2375 TokenType: Integer): IList<IToken>; overload; 2376 procedure Reset; virtual; 2377 public 2378 constructor Create; overload; 2379 constructor Create(const ATokenSource: ITokenSource); overload; 2380 constructor Create(const ATokenSource: ITokenSource; 2381 const AChannel: Integer); overload; 2382 constructor Create(const ALexer: ILexer); overload; 2383 constructor Create(const ALexer: ILexer; 2384 const AChannel: Integer); overload; 2385 2386 function ToString: String; overload; override; 2387 end; 2388 2389 TDFA = class abstract(TANTLRObject, IDFA) 2390 strict private 2391 FSpecialStateTransitionHandler: TSpecialStateTransitionHandler; 2392 FEOT: TSmallintArray; 2393 FEOF: TSmallintArray; 2394 FMin: TCharArray; 2395 FMax: TCharArray; 2396 FAccept: TSmallintArray; 2397 FSpecial: TSmallintArray; 2398 FTransition: TSmallintMatrix; 2399 FDecisionNumber: Integer; 2400 FRecognizer: Pointer; { IBaseRecognizer } 2401 function GetRecognizer: IBaseRecognizer; 2402 procedure SetRecognizer(const Value: IBaseRecognizer); 2403 strict protected 2404 procedure NoViableAlt(const S: Integer; const Input: IIntStream); 2405 2406 property Recognizer: IBaseRecognizer read GetRecognizer write SetRecognizer; 2407 property DecisionNumber: Integer read FDecisionNumber write FDecisionNumber; 2408 property EOT: TSmallintArray read FEOT write FEOT; 2409 property EOF: TSmallintArray read FEOF write FEOF; 2410 property Min: TCharArray read FMin write FMin; 2411 property Max: TCharArray read FMax write FMax; 2412 property Accept: TSmallintArray read FAccept write FAccept; 2413 property Special: TSmallintArray read FSpecial write FSpecial; 2414 property Transition: TSmallintMatrix read FTransition write FTransition; 2415 protected 2416 { IDFA } 2417 function GetSpecialStateTransitionHandler: TSpecialStateTransitionHandler; 2418 procedure SetSpecialStateTransitionHandler(const Value: TSpecialStateTransitionHandler); 2419 2420 function Predict(const Input: IIntStream): Integer; 2421 procedure Error(const NVAE: ENoViableAltException); virtual; 2422 function SpecialStateTransition(const S: Integer; 2423 const Input: IIntStream): Integer; virtual; 2424 function Description: String; virtual; 2425 function SpecialTransition(const State, Symbol: Integer): Integer; 2426 public 2427 class function UnpackEncodedString(const EncodedString: String): TSmallintArray; static; 2428 class function UnpackEncodedStringArray(const EncodedStrings: TStringArray): TSmallintMatrix; overload; static; 2429 class function UnpackEncodedStringArray(const EncodedStrings: array of String): TSmallintMatrix; overload; static; 2430 class function UnpackEncodedStringToUnsignedChars(const EncodedString: String): TCharArray; static; 2431 end; 2432 2433 TLexer = class abstract(TBaseRecognizer, ILexer, ITokenSource) 2434 strict private 2435 const 2436 TOKEN_dot_EOF = Ord(cscEOF); 2437 strict private 2438 /// <summary>Where is the lexer drawing characters from? </summary> 2439 FInput: ICharStream; 2440 protected 2441 { IBaseRecognizer } 2442 function GetSourceName: String; override; 2443 function GetInput: IIntStream; override; 2444 procedure Reset; override; 2445 procedure ReportError(const E: ERecognitionException); override; 2446 function GetErrorMessage(const E: ERecognitionException; 2447 const TokenNames: TStringArray): String; override; 2448 protected 2449 { ILexer } 2450 function GetCharStream: ICharStream; virtual; 2451 procedure SetCharStream(const Value: ICharStream); virtual; 2452 function GetLine: Integer; virtual; 2453 function GetCharPositionInLine: Integer; virtual; 2454 function GetCharIndex: Integer; virtual; 2455 function GetText: String; virtual; 2456 procedure SetText(const Value: String); virtual; 2457 2458 function NextToken: IToken; virtual; 2459 procedure Skip; 2460 procedure DoTokens; virtual; abstract; 2461 procedure Emit(const Token: IToken); overload; virtual; 2462 function Emit: IToken; overload; virtual; 2463 procedure Match(const S: String); reintroduce; overload; virtual; 2464 procedure Match(const C: Integer); reintroduce; overload; virtual; 2465 procedure MatchAny; reintroduce; overload; virtual; 2466 procedure MatchRange(const A, B: Integer); virtual; 2467 procedure Recover(const RE: ERecognitionException); reintroduce; overload; virtual; 2468 function GetCharErrorDisplay(const C: Integer): String; 2469 procedure TraceIn(const RuleName: String; const RuleIndex: Integer); reintroduce; overload; virtual; 2470 procedure TraceOut(const RuleName: String; const RuleIndex: Integer); reintroduce; overload; virtual; 2471 strict protected 2472 property Input: ICharStream read FInput; 2473 property CharIndex: Integer read GetCharIndex; 2474 property Text: String read GetText write SetText; 2475 public 2476 constructor Create; overload; 2477 constructor Create(const AInput: ICharStream); overload; 2478 constructor Create(const AInput: ICharStream; 2479 const AState: IRecognizerSharedState); overload; 2480 end; 2481 2482 TParser = class(TBaseRecognizer, IParser) 2483 strict private 2484 FInput: ITokenStream; 2485 protected 2486 property Input: ITokenStream read FInput; 2487 protected 2488 { IBaseRecognizer } 2489 procedure Reset; override; 2490 function GetCurrentInputSymbol(const Input: IIntStream): IANTLRInterface; override; 2491 function GetMissingSymbol(const Input: IIntStream; 2492 const E: ERecognitionException; const ExpectedTokenType: Integer; 2493 const Follow: IBitSet): IANTLRInterface; override; 2494 function GetSourceName: String; override; 2495 function GetInput: IIntStream; override; 2496 protected 2497 { IParser } 2498 function GetTokenStream: ITokenStream; virtual; 2499 procedure SetTokenStream(const Value: ITokenStream); virtual; 2500 2501 procedure TraceIn(const RuleName: String; const RuleIndex: Integer); reintroduce; overload; 2502 procedure TraceOut(const RuleName: String; const RuleIndex: Integer); reintroduce; overload; 2503 public 2504 constructor Create(const AInput: ITokenStream); overload; 2505 constructor Create(const AInput: ITokenStream; 2506 const AState: IRecognizerSharedState); overload; 2507 end; 2508 2509 TRuleReturnScope = class(TANTLRObject, IRuleReturnScope) 2510 protected 2511 { IRuleReturnScope } 2512 function GetStart: IANTLRInterface; virtual; 2513 procedure SetStart(const Value: IANTLRInterface); virtual; 2514 function GetStop: IANTLRInterface; virtual; 2515 procedure SetStop(const Value: IANTLRInterface); virtual; 2516 function GetTree: IANTLRInterface; virtual; 2517 procedure SetTree(const Value: IANTLRInterface); virtual; 2518 function GetTemplate: IANTLRInterface; virtual; 2519 end; 2520 2521 TParserRuleReturnScope = class(TRuleReturnScope, IParserRuleReturnScope) 2522 strict private 2523 FStart: IToken; 2524 FStop: IToken; 2525 protected 2526 { IRuleReturnScope } 2527 function GetStart: IANTLRInterface; override; 2528 procedure SetStart(const Value: IANTLRInterface); override; 2529 function GetStop: IANTLRInterface; override; 2530 procedure SetStop(const Value: IANTLRInterface); override; 2531 end; 2532 2533 TTokenRewriteStream = class(TCommonTokenStream, ITokenRewriteStream) 2534 public 2535 const 2536 DEFAULT_PROGRAM_NAME = 'default'; 2537 PROGRAM_INIT_SIZE = 100; 2538 MIN_TOKEN_INDEX = 0; 2539 strict protected 2540 // Define the rewrite operation hierarchy 2541 type 2542 IRewriteOperation = interface(IANTLRInterface) 2543 ['{285A54ED-58FF-44B1-A268-2686476D4419}'] 2544 { Property accessors } 2545 function GetInstructionIndex: Integer; 2546 procedure SetInstructionIndex(const Value: Integer); 2547 function GetIndex: Integer; 2548 procedure SetIndex(const Value: Integer); 2549 function GetText: IANTLRInterface; 2550 procedure SetText(const Value: IANTLRInterface); 2551 function GetParent: ITokenRewriteStream; 2552 procedure SetParent(const Value: ITokenRewriteStream); 2553 2554 { Methods } 2555 2556 /// <summary>Execute the rewrite operation by possibly adding to the buffer. 2557 /// Return the index of the next token to operate on. 2558 /// </summary> 2559 function Execute(const Buf: TStringBuilder): Integer; 2560 2561 { Properties } 2562 property InstructionIndex: Integer read GetInstructionIndex write SetInstructionIndex; 2563 property Index: Integer read GetIndex write SetIndex; 2564 property Text: IANTLRInterface read GetText write SetText; 2565 property Parent: ITokenRewriteStream read GetParent write SetParent; 2566 end; 2567 2568 TRewriteOperation = class(TANTLRObject, IRewriteOperation) 2569 strict private 2570 // What index into rewrites List are we? 2571 FInstructionIndex: Integer; 2572 // Token buffer index 2573 FIndex: Integer; 2574 FText: IANTLRInterface; 2575 FParent: Pointer; {ITokenRewriteStream;} 2576 protected 2577 { IRewriteOperation } 2578 function GetInstructionIndex: Integer; 2579 procedure SetInstructionIndex(const Value: Integer); 2580 function GetIndex: Integer; 2581 procedure SetIndex(const Value: Integer); 2582 function GetText: IANTLRInterface; 2583 procedure SetText(const Value: IANTLRInterface); 2584 function GetParent: ITokenRewriteStream; 2585 procedure SetParent(const Value: ITokenRewriteStream); 2586 2587 function Execute(const Buf: TStringBuilder): Integer; virtual; 2588 protected 2589 constructor Create(const AIndex: Integer; const AText: IANTLRInterface; 2590 const AParent: ITokenRewriteStream); 2591 2592 property Index: Integer read FIndex write FIndex; 2593 property Text: IANTLRInterface read FText write FText; 2594 property Parent: ITokenRewriteStream read GetParent write SetParent; 2595 public 2596 function ToString: String; override; 2597 end; 2598 2599 IInsertBeforeOp = interface(IRewriteOperation) 2600 ['{BFB732E2-BE6A-4691-AE3B-5C8013DE924E}'] 2601 end; 2602 2603 TInsertBeforeOp = class(TRewriteOperation, IInsertBeforeOp) 2604 protected 2605 { IRewriteOperation } 2606 function Execute(const Buf: TStringBuilder): Integer; override; 2607 end; 2608 2609 /// <summary>I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp 2610 /// instructions. 2611 /// </summary> 2612 IReplaceOp = interface(IRewriteOperation) 2613 ['{630C434A-99EA-4589-A65D-64A7B3DAC407}'] 2614 { Property accessors } 2615 function GetLastIndex: Integer; 2616 procedure SetLastIndex(const Value: Integer); 2617 2618 { Properties } 2619 property LastIndex: Integer read GetLastIndex write SetLastIndex; 2620 end; 2621 2622 TReplaceOp = class(TRewriteOperation, IReplaceOp) 2623 private 2624 FLastIndex: Integer; 2625 protected 2626 { IRewriteOperation } 2627 function Execute(const Buf: TStringBuilder): Integer; override; 2628 protected 2629 { IReplaceOp } 2630 function GetLastIndex: Integer; 2631 procedure SetLastIndex(const Value: Integer); 2632 public 2633 constructor Create(const AStart, AStop: Integer; 2634 const AText: IANTLRInterface; const AParent: ITokenRewriteStream); 2635 2636 function ToString: String; override; 2637 end; 2638 2639 IDeleteOp = interface(IRewriteOperation) 2640 ['{C39345BC-F170-4C3A-A989-65E6B9F0712B}'] 2641 end; 2642 2643 TDeleteOp = class(TReplaceOp) 2644 public 2645 function ToString: String; override; 2646 end; 2647 strict private 2648 type 2649 TRewriteOpComparer<T: IRewriteOperation> = class(TComparer<T>) 2650 public 2651 function Compare(const Left, Right: T): Integer; override; 2652 end; 2653 strict private 2654 /// <summary>You may have multiple, named streams of rewrite operations. 2655 /// I'm calling these things "programs." 2656 /// Maps String (name) -> rewrite (IList) 2657 /// </summary> 2658 FPrograms: IDictionary<String, IList<IRewriteOperation>>; 2659 2660 /// <summary>Map String (program name) -> Integer index </summary> 2661 FLastRewriteTokenIndexes: IDictionary<String, Integer>; 2662 strict private 2663 function InitializeProgram(const Name: String): IList<IRewriteOperation>; 2664 protected 2665 { ITokenRewriteStream } 2666 procedure Rollback(const InstructionIndex: Integer); overload; virtual; 2667 procedure Rollback(const ProgramName: String; 2668 const InstructionIndex: Integer); overload; virtual; 2669 2670 procedure DeleteProgram; overload; virtual; 2671 procedure DeleteProgram(const ProgramName: String); overload; virtual; 2672 2673 procedure InsertAfter(const T: IToken; const Text: IANTLRInterface); overload; virtual; 2674 procedure InsertAfter(const Index: Integer; const Text: IANTLRInterface); overload; virtual; 2675 procedure InsertAfter(const ProgramName: String; const T: IToken; 2676 const Text: IANTLRInterface); overload; virtual; 2677 procedure InsertAfter(const ProgramName: String; const Index: Integer; 2678 const Text: IANTLRInterface); overload; virtual; 2679 procedure InsertAfter(const T: IToken; const Text: String); overload; 2680 procedure InsertAfter(const Index: Integer; const Text: String); overload; 2681 procedure InsertAfter(const ProgramName: String; const T: IToken; 2682 const Text: String); overload; 2683 procedure InsertAfter(const ProgramName: String; const Index: Integer; 2684 const Text: String); overload; 2685 2686 procedure InsertBefore(const T: IToken; const Text: IANTLRInterface); overload; virtual; 2687 procedure InsertBefore(const Index: Integer; const Text: IANTLRInterface); overload; virtual; 2688 procedure InsertBefore(const ProgramName: String; const T: IToken; 2689 const Text: IANTLRInterface); overload; virtual; 2690 procedure InsertBefore(const ProgramName: String; const Index: Integer; 2691 const Text: IANTLRInterface); overload; virtual; 2692 procedure InsertBefore(const T: IToken; const Text: String); overload; 2693 procedure InsertBefore(const Index: Integer; const Text: String); overload; 2694 procedure InsertBefore(const ProgramName: String; const T: IToken; 2695 const Text: String); overload; 2696 procedure InsertBefore(const ProgramName: String; const Index: Integer; 2697 const Text: String); overload; 2698 2699 procedure Replace(const Index: Integer; const Text: IANTLRInterface); overload; virtual; 2700 procedure Replace(const Start, Stop: Integer; const Text: IANTLRInterface); overload; virtual; 2701 procedure Replace(const IndexT: IToken; const Text: IANTLRInterface); overload; virtual; 2702 procedure Replace(const Start, Stop: IToken; const Text: IANTLRInterface); overload; virtual; 2703 procedure Replace(const ProgramName: String; const Start, Stop: Integer; 2704 const Text: IANTLRInterface); overload; virtual; 2705 procedure Replace(const ProgramName: String; const Start, Stop: IToken; 2706 const Text: IANTLRInterface); overload; virtual; 2707 procedure Replace(const Index: Integer; const Text: String); overload; 2708 procedure Replace(const Start, Stop: Integer; const Text: String); overload; 2709 procedure Replace(const IndexT: IToken; const Text: String); overload; 2710 procedure Replace(const Start, Stop: IToken; const Text: String); overload; 2711 procedure Replace(const ProgramName: String; const Start, Stop: Integer; 2712 const Text: String); overload; 2713 procedure Replace(const ProgramName: String; const Start, Stop: IToken; 2714 const Text: String); overload; 2715 2716 procedure Delete(const Index: Integer); overload; virtual; 2717 procedure Delete(const Start, Stop: Integer); overload; virtual; 2718 procedure Delete(const IndexT: IToken); overload; virtual; 2719 procedure Delete(const Start, Stop: IToken); overload; virtual; 2720 procedure Delete(const ProgramName: String; const Start, Stop: Integer); overload; virtual; 2721 procedure Delete(const ProgramName: String; const Start, Stop: IToken); overload; virtual; 2722 2723 function GetLastRewriteTokenIndex: Integer; overload; virtual; 2724 2725 function ToOriginalString: String; overload; virtual; 2726 function ToOriginalString(const Start, Stop: Integer): String; overload; virtual; 2727 2728 function ToString(const ProgramName: String): String; overload; virtual; 2729 function ToString(const ProgramName: String; 2730 const Start, Stop: Integer): String; overload; virtual; 2731 2732 function ToDebugString: String; overload; virtual; 2733 function ToDebugString(const Start, Stop: Integer): String; overload; virtual; 2734 protected 2735 { ITokenStream } 2736 function ToString(const Start, Stop: Integer): String; overload; override; 2737 strict protected 2738 procedure Init; virtual; 2739 function GetProgram(const Name: String): IList<IRewriteOperation>; virtual; 2740 function GetLastRewriteTokenIndex(const ProgramName: String): Integer; overload; virtual; 2741 procedure SetLastRewriteTokenIndex(const ProgramName: String; const I: Integer); overload; virtual; 2742 2743 /// <summary> 2744 /// Return a map from token index to operation. 2745 /// </summary> 2746 /// <remarks>We need to combine operations and report invalid operations (like 2747 /// overlapping replaces that are not completed nested). Inserts to 2748 /// same index need to be combined etc... Here are the cases: 2749 /// 2750 /// I.i.u I.j.v leave alone, nonoverlapping 2751 /// I.i.u I.i.v combine: Iivu 2752 /// 2753 /// R.i-j.u R.x-y.v | i-j in x-y delete first R 2754 /// R.i-j.u R.i-j.v delete first R 2755 /// R.i-j.u R.x-y.v | x-y in i-j ERROR 2756 /// R.i-j.u R.x-y.v | boundaries overlap ERROR 2757 /// 2758 /// I.i.u R.x-y.v | i in x-y delete I 2759 /// I.i.u R.x-y.v | i not in x-y leave alone, nonoverlapping 2760 /// R.x-y.v I.i.u | i in x-y ERROR 2761 /// R.x-y.v I.x.u R.x-y.uv (combine, delete I) 2762 /// R.x-y.v I.i.u | i not in x-y leave alone, nonoverlapping 2763 /// 2764 /// I.i.u = insert u before op @ index i 2765 /// R.x-y.u = replace x-y indexed tokens with u 2766 /// 2767 /// First we need to examine replaces. For any replace op: 2768 /// 2769 /// 1. wipe out any insertions before op within that range. 2770 /// 2. Drop any replace op before that is contained completely within 2771 /// that range. 2772 /// 3. Throw exception upon boundary overlap with any previous replace. 2773 /// 2774 /// Then we can deal with inserts: 2775 /// 2776 /// 1. for any inserts to same index, combine even if not adjacent. 2777 /// 2. for any prior replace with same left boundary, combine this 2778 /// insert with replace and delete this replace. 2779 /// 3. throw exception if index in same range as previous replace 2780 /// 2781 /// Don't actually delete; make op null in list. Easier to walk list. 2782 /// Later we can throw as we add to index -> op map. 2783 /// 2784 /// Note that I.2 R.2-2 will wipe out I.2 even though, technically, the 2785 /// inserted stuff would be before the replace range. But, if you 2786 /// add tokens in front of a method body '{' and then delete the method 2787 /// body, I think the stuff before the '{' you added should disappear too. 2788 /// </remarks> 2789 function ReduceToSingleOperationPerIndex( 2790 const Rewrites: IList<IRewriteOperation>): IDictionary<Integer, IRewriteOperation>; 2791 2792 function GetKindOfOps(const Rewrites: IList<IRewriteOperation>; 2793 const Kind: TGUID): IList<IRewriteOperation>; overload; 2794 /// <summary> 2795 /// Get all operations before an index of a particular kind 2796 /// </summary> 2797 function GetKindOfOps(const Rewrites: IList<IRewriteOperation>; 2798 const Kind: TGUID; const Before: Integer): IList<IRewriteOperation>; overload; 2799 2800 function CatOpText(const A, B: IANTLRInterface): IANTLRInterface; 2801 public 2802 constructor Create; overload; 2803 constructor Create(const ATokenSource: ITokenSource); overload; 2804 constructor Create(const ATokenSource: ITokenSource; 2805 const AChannel: Integer); overload; 2806 constructor Create(const ALexer: ILexer); overload; 2807 constructor Create(const ALexer: ILexer; 2808 const AChannel: Integer); overload; 2809 2810 function ToString: String; overload; override; 2811 end; 2812 2813 { These functions return X or, if X = nil, an empty default instance } 2814 function Def(const X: IToken): IToken; overload; 2815 function Def(const X: IRuleReturnScope): IRuleReturnScope; overload; 2816 2817 implementation 2818 2819 uses 2820 StrUtils, 2821 Math, 2822 Antlr.Runtime.Tree; 2823 2824 { ERecognitionException } 2825 2826 constructor ERecognitionException.Create; 2827 begin 2828 Create('', nil); 2829 end; 2830 2831 constructor ERecognitionException.Create(const AMessage: String); 2832 begin 2833 Create(AMessage, nil); 2834 end; 2835 2836 constructor ERecognitionException.Create(const AInput: IIntStream); 2837 begin 2838 Create('', AInput); 2839 end; 2840 2841 constructor ERecognitionException.Create(const AMessage: String; 2842 const AInput: IIntStream); 2843 var 2844 TokenStream: ITokenStream; 2845 CharStream: ICharStream; 2846 begin 2847 inherited Create(AMessage); 2848 FInput := AInput; 2849 FIndex := AInput.Index; 2850 2851 if Supports(AInput, ITokenStream, TokenStream) then 2852 begin 2853 FToken := TokenStream.LT(1); 2854 FLine := FToken.Line; 2855 FCharPositionInLine := FToken.CharPositionInLine; 2856 end; 2857 2858 if Supports(AInput, ITreeNodeStream) then 2859 ExtractInformationFromTreeNodeStream(AInput) 2860 else 2861 begin 2862 if Supports(AInput, ICharStream, CharStream) then 2863 begin 2864 FC := AInput.LA(1); 2865 FLine := CharStream.Line; 2866 FCharPositionInLine := CharStream.CharPositionInLine; 2867 end 2868 else 2869 FC := AInput.LA(1); 2870 end; 2871 end; 2872 2873 procedure ERecognitionException.ExtractInformationFromTreeNodeStream( 2874 const Input: IIntStream); 2875 var 2876 Nodes: ITreeNodeStream; 2877 Adaptor: ITreeAdaptor; 2878 Payload, PriorPayload: IToken; 2879 I, NodeType: Integer; 2880 PriorNode: IANTLRInterface; 2881 Tree: ITree; 2882 Text: String; 2883 CommonTree: ICommonTree; 2884 begin 2885 Nodes := Input as ITreeNodeStream; 2886 FNode := Nodes.LT(1); 2887 Adaptor := Nodes.TreeAdaptor; 2888 Payload := Adaptor.GetToken(FNode); 2889 2890 if Assigned(Payload) then 2891 begin 2892 FToken := Payload; 2893 if (Payload.Line <= 0) then 2894 begin 2895 // imaginary node; no line/pos info; scan backwards 2896 I := -1; 2897 PriorNode := Nodes.LT(I); 2898 while Assigned(PriorNode) do 2899 begin 2900 PriorPayload := Adaptor.GetToken(PriorNode); 2901 if Assigned(PriorPayload) and (PriorPayload.Line > 0) then 2902 begin 2903 // we found the most recent real line / pos info 2904 FLine := PriorPayload.Line; 2905 FCharPositionInLine := PriorPayload.CharPositionInLine; 2906 FApproximateLineInfo := True; 2907 Break; 2908 end; 2909 Dec(I); 2910 PriorNode := Nodes.LT(I) 2911 end; 2912 end 2913 else 2914 begin 2915 // node created from real token 2916 FLine := Payload.Line; 2917 FCharPositionInLine := Payload.CharPositionInLine; 2918 end; 2919 end else 2920 if Supports(FNode, ITree, Tree) then 2921 begin 2922 FLine := Tree.Line; 2923 FCharPositionInLine := Tree.CharPositionInLine; 2924 if Supports(FNode, ICommonTree, CommonTree) then 2925 FToken := CommonTree.Token; 2926 end 2927 else 2928 begin 2929 NodeType := Adaptor.GetNodeType(FNode); 2930 Text := Adaptor.GetNodeText(FNode); 2931 FToken := TCommonToken.Create(NodeType, Text); 2932 end; 2933 end; 2934 2935 function ERecognitionException.GetUnexpectedType: Integer; 2936 var 2937 Nodes: ITreeNodeStream; 2938 Adaptor: ITreeAdaptor; 2939 begin 2940 if Supports(FInput, ITokenStream) then 2941 Result := FToken.TokenType 2942 else 2943 if Supports(FInput, ITreeNodeStream, Nodes) then 2944 begin 2945 Adaptor := Nodes.TreeAdaptor; 2946 Result := Adaptor.GetNodeType(FNode); 2947 end else 2948 Result := FC; 2949 end; 2950 2951 { EMismatchedTokenException } 2952 2953 constructor EMismatchedTokenException.Create(const AExpecting: Integer; 2954 const AInput: IIntStream); 2955 begin 2956 inherited Create(AInput); 2957 FExpecting := AExpecting; 2958 end; 2959 2960 function EMismatchedTokenException.ToString: String; 2961 begin 2962 Result := 'MismatchedTokenException(' + IntToStr(UnexpectedType) 2963 + '!=' + IntToStr(Expecting) + ')'; 2964 2965 end; 2966 2967 { EUnwantedTokenException } 2968 2969 function EUnwantedTokenException.GetUnexpectedToken: IToken; 2970 begin 2971 Result := FToken; 2972 end; 2973 2974 function EUnwantedTokenException.ToString: String; 2975 var 2976 Exp: String; 2977 begin 2978 if (Expecting = TToken.INVALID_TOKEN_TYPE) then 2979 Exp := '' 2980 else 2981 Exp := ', expected ' + IntToStr(Expecting); 2982 if (Token = nil) then 2983 Result := 'UnwantedTokenException(found=nil' + Exp + ')' 2984 else 2985 Result := 'UnwantedTokenException(found=' + Token.Text + Exp + ')' 2986 end; 2987 2988 { EMissingTokenException } 2989 2990 constructor EMissingTokenException.Create(const AExpecting: Integer; 2991 const AInput: IIntStream; const AInserted: IANTLRInterface); 2992 begin 2993 inherited Create(AExpecting, AInput); 2994 FInserted := AInserted; 2995 end; 2996 2997 function EMissingTokenException.GetMissingType: Integer; 2998 begin 2999 Result := Expecting; 3000 end; 3001 3002 function EMissingTokenException.ToString: String; 3003 begin 3004 if Assigned(FInserted) and Assigned(FToken) then 3005 Result := 'MissingTokenException(inserted ' + FInserted.ToString 3006 + ' at ' + FToken.Text + ')' 3007 else 3008 if Assigned(FToken) then 3009 Result := 'MissingTokenException(at ' + FToken.Text + ')' 3010 else 3011 Result := 'MissingTokenException'; 3012 end; 3013 3014 { EMismatchedTreeNodeException } 3015 3016 constructor EMismatchedTreeNodeException.Create(const AExpecting: Integer; 3017 const AInput: IIntStream); 3018 begin 3019 inherited Create(AInput); 3020 FExpecting := AExpecting; 3021 end; 3022 3023 function EMismatchedTreeNodeException.ToString: String; 3024 begin 3025 Result := 'MismatchedTreeNodeException(' + IntToStr(UnexpectedType) 3026 + '!=' + IntToStr(Expecting) + ')'; 3027 end; 3028 3029 { ENoViableAltException } 3030 3031 constructor ENoViableAltException.Create( 3032 const AGrammarDecisionDescription: String; const ADecisionNumber, 3033 AStateNumber: Integer; const AInput: IIntStream); 3034 begin 3035 inherited Create(AInput); 3036 FGrammarDecisionDescription := AGrammarDecisionDescription; 3037 FDecisionNumber := ADecisionNumber; 3038 FStateNumber := AStateNumber; 3039 end; 3040 3041 function ENoViableAltException.ToString: String; 3042 begin 3043 if Supports(Input, ICharStream) then 3044 Result := 'NoViableAltException(''' + Char(UnexpectedType) + '''@[' 3045 + FGrammarDecisionDescription + '])' 3046 else 3047 Result := 'NoViableAltException(''' + IntToStr(UnexpectedType) + '''@[' 3048 + FGrammarDecisionDescription + '])' 3049 end; 3050 3051 { EEarlyExitException } 3052 3053 constructor EEarlyExitException.Create(const ADecisionNumber: Integer; 3054 const AInput: IIntStream); 3055 begin 3056 inherited Create(AInput); 3057 FDecisionNumber := ADecisionNumber; 3058 end; 3059 3060 { EMismatchedSetException } 3061 3062 constructor EMismatchedSetException.Create(const AExpecting: IBitSet; 3063 const AInput: IIntStream); 3064 begin 3065 inherited Create(AInput); 3066 FExpecting := AExpecting; 3067 end; 3068 3069 function EMismatchedSetException.ToString: String; 3070 begin 3071 Result := 'MismatchedSetException(' + IntToStr(UnexpectedType) 3072 + '!=' + Expecting.ToString + ')'; 3073 end; 3074 3075 { EMismatchedNotSetException } 3076 3077 function EMismatchedNotSetException.ToString: String; 3078 begin 3079 Result := 'MismatchedNotSetException(' + IntToStr(UnexpectedType) 3080 + '!=' + Expecting.ToString + ')'; 3081 end; 3082 3083 { EFailedPredicateException } 3084 3085 constructor EFailedPredicateException.Create(const AInput: IIntStream; 3086 const ARuleName, APredicateText: String); 3087 begin 3088 inherited Create(AInput); 3089 FRuleName := ARuleName; 3090 FPredicateText := APredicateText; 3091 end; 3092 3093 function EFailedPredicateException.ToString: String; 3094 begin 3095 Result := 'FailedPredicateException(' + FRuleName + ',{' + FPredicateText + '}?)'; 3096 end; 3097 3098 { EMismatchedRangeException } 3099 3100 constructor EMismatchedRangeException.Create(const AA, AB: Integer; 3101 const AInput: IIntStream); 3102 begin 3103 inherited Create(FInput); 3104 FA := AA; 3105 FB := AB; 3106 end; 3107 3108 function EMismatchedRangeException.ToString: String; 3109 begin 3110 Result := 'MismatchedNotSetException(' + IntToStr(UnexpectedType) 3111 + ' not in [' + IntToStr(FA)+ ',' + IntToStr(FB) + '])'; 3112 end; 3113 3114 { TCharStreamState } 3115 3116 function TCharStreamState.GetCharPositionInLine: Integer; 3117 begin 3118 Result := FCharPositionInLine; 3119 end; 3120 3121 function TCharStreamState.GetLine: Integer; 3122 begin 3123 Result := FLine; 3124 end; 3125 3126 function TCharStreamState.GetP: Integer; 3127 begin 3128 Result := FP; 3129 end; 3130 3131 procedure TCharStreamState.SetCharPositionInLine(const Value: Integer); 3132 begin 3133 FCharPositionInLine := Value; 3134 end; 3135 3136 procedure TCharStreamState.SetLine(const Value: Integer); 3137 begin 3138 FLine := Value; 3139 end; 3140 3141 procedure TCharStreamState.SetP(const Value: Integer); 3142 begin 3143 FP := Value; 3144 end; 3145 3146 { TANTLRStringStream } 3147 3148 constructor TANTLRStringStream.Create(const AInput: String); 3149 begin 3150 inherited Create; 3151 FLine := 1; 3152 FOwnsData := True; 3153 FN := Length(AInput); 3154 if (FN > 0) then 3155 begin 3156 GetMem(FData,FN * SizeOf(Char)); 3157 Move(AInput[1],FData^,FN * SizeOf(Char)); 3158 end; 3159 end; 3160 3161 procedure TANTLRStringStream.Consume; 3162 begin 3163 if (FP < FN) then 3164 begin 3165 Inc(FCharPositionInLine); 3166 if (FData[FP] = #10) then 3167 begin 3168 Inc(FLine); 3169 FCharPositionInLine := 0; 3170 end; 3171 Inc(FP); 3172 end; 3173 end; 3174 3175 constructor TANTLRStringStream.Create(const AData: PChar; 3176 const ANumberOfActualCharsInArray: Integer); 3177 begin 3178 inherited Create; 3179 FLine := 1; 3180 FOwnsData := False; 3181 FData := AData; 3182 FN := ANumberOfActualCharsInArray; 3183 end; 3184 3185 constructor TANTLRStringStream.Create; 3186 begin 3187 inherited Create; 3188 FLine := 1; 3189 end; 3190 3191 destructor TANTLRStringStream.Destroy; 3192 begin 3193 if (FOwnsData) then 3194 FreeMem(FData); 3195 inherited; 3196 end; 3197 3198 function TANTLRStringStream.GetCharPositionInLine: Integer; 3199 begin 3200 Result := FCharPositionInLine; 3201 end; 3202 3203 function TANTLRStringStream.GetLine: Integer; 3204 begin 3205 Result := FLine; 3206 end; 3207 3208 function TANTLRStringStream.GetSourceName: String; 3209 begin 3210 Result := FName; 3211 end; 3212 3213 function TANTLRStringStream.Index: Integer; 3214 begin 3215 Result := FP; 3216 end; 3217 3218 function TANTLRStringStream.LA(I: Integer): Integer; 3219 begin 3220 if (I = 0) then 3221 Result := 0 // undefined 3222 else begin 3223 if (I < 0) then 3224 begin 3225 Inc(I); // e.g., translate LA(-1) to use offset i=0; then data[p+0-1] 3226 if ((FP + I - 1) < 0) then 3227 begin 3228 Result := Integer(cscEOF); 3229 Exit; 3230 end; 3231 end; 3232 3233 if ((FP + I - 1) >= FN) then 3234 Result := Integer(cscEOF) 3235 else 3236 Result := Integer(FData[FP + I - 1]); 3237 end; 3238 end; 3239 3240 function TANTLRStringStream.LAChar(I: Integer): Char; 3241 begin 3242 Result := Char(LA(I)); 3243 end; 3244 3245 function TANTLRStringStream.LT(const I: Integer): Integer; 3246 begin 3247 Result := LA(I); 3248 end; 3249 3250 function TANTLRStringStream.Mark: Integer; 3251 var 3252 State: ICharStreamState; 3253 begin 3254 if (FMarkers = nil) then 3255 begin 3256 FMarkers := TList<ICharStreamState>.Create; 3257 FMarkers.Add(nil); // depth 0 means no backtracking, leave blank 3258 end; 3259 3260 Inc(FMarkDepth); 3261 if (FMarkDepth >= FMarkers.Count) then 3262 begin 3263 State := TCharStreamState.Create; 3264 FMarkers.Add(State); 3265 end 3266 else 3267 State := FMarkers[FMarkDepth]; 3268 3269 State.P := FP; 3270 State.Line := FLine; 3271 State.CharPositionInLine := FCharPositionInLine; 3272 FLastMarker := FMarkDepth; 3273 Result := FMarkDepth; 3274 end; 3275 3276 procedure TANTLRStringStream.Release(const Marker: Integer); 3277 begin 3278 // unwind any other markers made after m and release m 3279 FMarkDepth := Marker; 3280 // release this marker 3281 Dec(FMarkDepth); 3282 end; 3283 3284 procedure TANTLRStringStream.Reset; 3285 begin 3286 FP := 0; 3287 FLine := 1; 3288 FCharPositionInLine := 0; 3289 FMarkDepth := 0; 3290 end; 3291 3292 procedure TANTLRStringStream.Rewind(const Marker: Integer); 3293 var 3294 State: ICharStreamState; 3295 begin 3296 State := FMarkers[Marker]; 3297 // restore stream state 3298 Seek(State.P); 3299 FLine := State.Line; 3300 FCharPositionInLine := State.CharPositionInLine; 3301 Release(Marker); 3302 end; 3303 3304 procedure TANTLRStringStream.Rewind; 3305 begin 3306 Rewind(FLastMarker); 3307 end; 3308 3309 procedure TANTLRStringStream.Seek(const Index: Integer); 3310 begin 3311 if (Index <= FP) then 3312 FP := Index // just jump; don't update stream state (line, ...) 3313 else begin 3314 // seek forward, consume until p hits index 3315 while (FP < Index) do 3316 Consume; 3317 end; 3318 end; 3319 3320 procedure TANTLRStringStream.SetCharPositionInLine(const Value: Integer); 3321 begin 3322 FCharPositionInLine := Value; 3323 end; 3324 3325 procedure TANTLRStringStream.SetLine(const Value: Integer); 3326 begin 3327 FLine := Value; 3328 end; 3329 3330 function TANTLRStringStream.Size: Integer; 3331 begin 3332 Result := FN; 3333 end; 3334 3335 function TANTLRStringStream.Substring(const Start, Stop: Integer): String; 3336 begin 3337 Result := Copy(FData, Start + 1, Stop - Start + 1); 3338 end; 3339 3340 { TANTLRFileStream } 3341 3342 constructor TANTLRFileStream.Create(const AFileName: String); 3343 begin 3344 Create(AFilename,TEncoding.Default); 3345 end; 3346 3347 constructor TANTLRFileStream.Create(const AFileName: String; 3348 const AEncoding: TEncoding); 3349 begin 3350 inherited Create; 3351 FFileName := AFileName; 3352 Load(FFileName, AEncoding); 3353 end; 3354 3355 function TANTLRFileStream.GetSourceName: String; 3356 begin 3357 Result := FFileName; 3358 end; 3359 3360 procedure TANTLRFileStream.Load(const FileName: String; 3361 const Encoding: TEncoding); 3362 var 3363 FR: TStreamReader; 3364 S: String; 3365 begin 3366 if (FFileName <> '') then 3367 begin 3368 if (Encoding = nil) then 3369 FR := TStreamReader.Create(FileName,TEncoding.Default) 3370 else 3371 FR := TStreamReader.Create(FileName,Encoding); 3372 3373 try 3374 if (FOwnsData) then 3375 begin 3376 FreeMem(FData); 3377 FData := nil; 3378 end; 3379 3380 FOwnsData := True; 3381 S := FR.ReadToEnd; 3382 FN := Length(S); 3383 if (FN > 0) then 3384 begin 3385 GetMem(FData,FN * SizeOf(Char)); 3386 Move(S[1],FData^,FN * SizeOf(Char)); 3387 end; 3388 finally 3389 FR.Free; 3390 end; 3391 end; 3392 end; 3393 3394 { TBitSet } 3395 3396 class function TBitSet.BitSetOf(const El: Integer): IBitSet; 3397 begin 3398 Result := TBitSet.Create(El + 1); 3399 Result.Add(El); 3400 end; 3401 3402 class function TBitSet.BitSetOf(const A, B: Integer): IBitSet; 3403 begin 3404 Result := TBitSet.Create(Max(A,B) + 1); 3405 Result.Add(A); 3406 Result.Add(B); 3407 end; 3408 3409 class function TBitSet.BitSetOf(const A, B, C: Integer): IBitSet; 3410 begin 3411 Result := TBitSet.Create; 3412 Result.Add(A); 3413 Result.Add(B); 3414 Result.Add(C); 3415 end; 3416 3417 class function TBitSet.BitSetOf(const A, B, C, D: Integer): IBitSet; 3418 begin 3419 Result := TBitSet.Create; 3420 Result.Add(A); 3421 Result.Add(B); 3422 Result.Add(C); 3423 Result.Add(D); 3424 end; 3425 3426 procedure TBitSet.Add(const El: Integer); 3427 var 3428 N: Integer; 3429 begin 3430 N := WordNumber(El); 3431 if (N >= Length(FBits)) then 3432 GrowToInclude(El); 3433 FBits[N] := FBits[N] or BitMask(El); 3434 end; 3435 3436 class function TBitSet.BitMask(const BitNumber: Integer): UInt64; 3437 var 3438 BitPosition: Integer; 3439 begin 3440 BitPosition := BitNumber and MOD_MASK; 3441 Result := UInt64(1) shl BitPosition; 3442 end; 3443 3444 function TBitSet.BitSetOr(const A: IBitSet): IBitSet; 3445 begin 3446 Result := Clone as IBitSet; 3447 Result.OrInPlace(A); 3448 end; 3449 3450 function TBitSet.Clone: IANTLRInterface; 3451 var 3452 BS: TBitSet; 3453 begin 3454 BS := TBitSet.Create; 3455 Result := BS; 3456 SetLength(BS.FBits,Length(FBits)); 3457 if (Length(FBits) > 0) then 3458 Move(FBits[0],BS.FBits[0],Length(FBits) * SizeOf(UInt64)); 3459 end; 3460 3461 constructor TBitSet.Create; 3462 begin 3463 Create(BITS); 3464 end; 3465 3466 constructor TBitSet.Create(const ABits: array of UInt64); 3467 begin 3468 inherited Create; 3469 SetLength(FBits, Length(ABits)); 3470 if (Length(ABits) > 0) then 3471 Move(ABits[0], FBits[0], Length(ABits) * SizeOf(UInt64)); 3472 end; 3473 3474 constructor TBitSet.Create(const AItems: IList<Integer>); 3475 var 3476 V: Integer; 3477 begin 3478 Create(BITS); 3479 for V in AItems do 3480 Add(V); 3481 end; 3482 3483 constructor TBitSet.Create(const ANBits: Integer); 3484 begin 3485 inherited Create; 3486 SetLength(FBits,((ANBits - 1) shr LOG_BITS) + 1); 3487 end; 3488 3489 function TBitSet.Equals(Obj: TObject): Boolean; 3490 var 3491 OtherSet: TBitSet absolute Obj; 3492 I, N: Integer; 3493 begin 3494 Result := False; 3495 if (Obj = nil) or (not (Obj is TBitSet)) then 3496 Exit; 3497 3498 N := Min(Length(FBits), Length(OtherSet.FBits)); 3499 3500 // for any bits in common, compare 3501 for I := 0 to N - 1 do 3502 begin 3503 if (FBits[I] <> OtherSet.FBits[I]) then 3504 Exit; 3505 end; 3506 3507 // make sure any extra bits are off 3508 if (Length(FBits) > N) then 3509 begin 3510 for I := N + 1 to Length(FBits) - 1 do 3511 begin 3512 if (FBits[I] <> 0) then 3513 Exit; 3514 end; 3515 end 3516 else 3517 if (Length(OtherSet.FBits) > N) then 3518 begin 3519 for I := N + 1 to Length(OtherSet.FBits) - 1 do 3520 begin 3521 if (OtherSet.FBits[I] <> 0) then 3522 Exit; 3523 end; 3524 end; 3525 3526 Result := True; 3527 end; 3528 3529 function TBitSet.GetIsNil: Boolean; 3530 var 3531 I: Integer; 3532 begin 3533 for I := Length(FBits) - 1 downto 0 do 3534 if (FBits[I] <> 0) then 3535 begin 3536 Result := False; 3537 Exit; 3538 end; 3539 Result := True; 3540 end; 3541 3542 procedure TBitSet.GrowToInclude(const Bit: Integer); 3543 var 3544 NewSize: Integer; 3545 begin 3546 NewSize := Max(Length(FBits) shl 1,NumWordsToHold(Bit)); 3547 SetLength(FBits,NewSize); 3548 end; 3549 3550 function TBitSet.LengthInLongWords: Integer; 3551 begin 3552 Result := Length(FBits); 3553 end; 3554 3555 function TBitSet.Member(const El: Integer): Boolean; 3556 var 3557 N: Integer; 3558 begin 3559 if (El < 0) then 3560 Result := False 3561 else 3562 begin 3563 N := WordNumber(El); 3564 if (N >= Length(FBits)) then 3565 Result := False 3566 else 3567 Result := ((FBits[N] and BitMask(El)) <> 0); 3568 end; 3569 end; 3570 3571 function TBitSet.NumBits: Integer; 3572 begin 3573 Result := Length(FBits) shl LOG_BITS; 3574 end; 3575 3576 class function TBitSet.NumWordsToHold(const El: Integer): Integer; 3577 begin 3578 Result := (El shr LOG_BITS) + 1; 3579 end; 3580 3581 procedure TBitSet.OrInPlace(const A: IBitSet); 3582 var 3583 I, M: Integer; 3584 ABits: TUInt64Array; 3585 begin 3586 if Assigned(A) then 3587 begin 3588 // If this is smaller than a, grow this first 3589 if (A.LengthInLongWords > Length(FBits)) then 3590 SetLength(FBits,A.LengthInLongWords); 3591 M := Min(Length(FBits), A.LengthInLongWords); 3592 ABits := A.ToPackedArray; 3593 for I := M - 1 downto 0 do 3594 FBits[I] := FBits[I] or ABits[I]; 3595 end; 3596 end; 3597 3598 procedure TBitSet.Remove(const El: Integer); 3599 var 3600 N: Integer; 3601 begin 3602 N := WordNumber(El); 3603 if (N < Length(FBits)) then 3604 FBits[N] := (FBits[N] and not BitMask(El)); 3605 end; 3606 3607 function TBitSet.Size: Integer; 3608 var 3609 I, Bit: Integer; 3610 W: UInt64; 3611 begin 3612 Result := 0; 3613 for I := Length(FBits) - 1 downto 0 do 3614 begin 3615 W := FBits[I]; 3616 if (W <> 0) then 3617 begin 3618 for Bit := BITS - 1 downto 0 do 3619 begin 3620 if ((W and (UInt64(1) shl Bit)) <> 0) then 3621 Inc(Result); 3622 end; 3623 end; 3624 end; 3625 end; 3626 3627 function TBitSet.ToArray: TIntegerArray; 3628 var 3629 I, En: Integer; 3630 begin 3631 SetLength(Result,Size); 3632 En := 0; 3633 for I := 0 to (Length(FBits) shl LOG_BITS) - 1 do 3634 begin 3635 if Member(I) then 3636 begin 3637 Result[En] := I; 3638 Inc(En); 3639 end; 3640 end; 3641 end; 3642 3643 function TBitSet.ToPackedArray: TUInt64Array; 3644 begin 3645 Result := FBits; 3646 end; 3647 3648 function TBitSet.ToString: String; 3649 begin 3650 Result := ToString(nil); 3651 end; 3652 3653 function TBitSet.ToString(const TokenNames: TStringArray): String; 3654 var 3655 Buf: TStringBuilder; 3656 I: Integer; 3657 HavePrintedAnElement: Boolean; 3658 begin 3659 HavePrintedAnElement := False; 3660 Buf := TStringBuilder.Create; 3661 try 3662 Buf.Append('{'); 3663 for I := 0 to (Length(FBits) shl LOG_BITS) - 1 do 3664 begin 3665 if Member(I) then 3666 begin 3667 if (I > 0) and HavePrintedAnElement then 3668 Buf.Append(','); 3669 if Assigned(TokenNames) then 3670 Buf.Append(TokenNames[I]) 3671 else 3672 Buf.Append(I); 3673 HavePrintedAnElement := True; 3674 end; 3675 end; 3676 Buf.Append('}'); 3677 Result := Buf.ToString; 3678 finally 3679 Buf.Free; 3680 end; 3681 end; 3682 3683 class function TBitSet.WordNumber(const Bit: Integer): Integer; 3684 begin 3685 Result := Bit shr LOG_BITS; // Bit / BITS 3686 end; 3687 3688 { TRecognizerSharedState } 3689 3690 constructor TRecognizerSharedState.Create; 3691 var 3692 I: Integer; 3693 begin 3694 inherited; 3695 SetLength(FFollowing,TBaseRecognizer.INITIAL_FOLLOW_STACK_SIZE); 3696 for I := 0 to TBaseRecognizer.INITIAL_FOLLOW_STACK_SIZE - 1 do 3697 FFollowing[I] := TBitSet.Create; 3698 FFollowingStackPointer := -1; 3699 FLastErrorIndex := -1; 3700 FTokenStartCharIndex := -1; 3701 end; 3702 3703 function TRecognizerSharedState.GetBacktracking: Integer; 3704 begin 3705 Result := FBacktracking; 3706 end; 3707 3708 function TRecognizerSharedState.GetChannel: Integer; 3709 begin 3710 Result := FChannel; 3711 end; 3712 3713 function TRecognizerSharedState.GetErrorRecovery: Boolean; 3714 begin 3715 Result := FErrorRecovery; 3716 end; 3717 3718 function TRecognizerSharedState.GetFailed: Boolean; 3719 begin 3720 Result := FFailed; 3721 end; 3722 3723 function TRecognizerSharedState.GetFollowing: TBitSetArray; 3724 begin 3725 Result := FFollowing; 3726 end; 3727 3728 function TRecognizerSharedState.GetFollowingStackPointer: Integer; 3729 begin 3730 Result := FFollowingStackPointer; 3731 end; 3732 3733 function TRecognizerSharedState.GetLastErrorIndex: Integer; 3734 begin 3735 Result := FLastErrorIndex; 3736 end; 3737 3738 function TRecognizerSharedState.GetRuleMemo: TDictionaryArray<Integer, Integer>; 3739 begin 3740 Result := FRuleMemo; 3741 end; 3742 3743 function TRecognizerSharedState.GetRuleMemoCount: Integer; 3744 begin 3745 Result := Length(FRuleMemo); 3746 end; 3747 3748 function TRecognizerSharedState.GetSyntaxErrors: Integer; 3749 begin 3750 Result := FSyntaxErrors; 3751 end; 3752 3753 function TRecognizerSharedState.GetText: String; 3754 begin 3755 Result := FText; 3756 end; 3757 3758 function TRecognizerSharedState.GetToken: IToken; 3759 begin 3760 Result := FToken; 3761 end; 3762 3763 function TRecognizerSharedState.GetTokenStartCharIndex: Integer; 3764 begin 3765 Result := FTokenStartCharIndex; 3766 end; 3767 3768 function TRecognizerSharedState.GetTokenStartCharPositionInLine: Integer; 3769 begin 3770 Result := FTokenStartCharPositionInLine; 3771 end; 3772 3773 function TRecognizerSharedState.GetTokenStartLine: Integer; 3774 begin 3775 Result := FTokenStartLine; 3776 end; 3777 3778 function TRecognizerSharedState.GetTokenType: Integer; 3779 begin 3780 Result := FTokenType; 3781 end; 3782 3783 procedure TRecognizerSharedState.SetBacktracking(const Value: Integer); 3784 begin 3785 FBacktracking := Value; 3786 end; 3787 3788 procedure TRecognizerSharedState.SetChannel(const Value: Integer); 3789 begin 3790 FChannel := Value; 3791 end; 3792 3793 procedure TRecognizerSharedState.SetErrorRecovery(const Value: Boolean); 3794 begin 3795 FErrorRecovery := Value; 3796 end; 3797 3798 procedure TRecognizerSharedState.SetFailed(const Value: Boolean); 3799 begin 3800 FFailed := Value; 3801 end; 3802 3803 procedure TRecognizerSharedState.SetFollowing(const Value: TBitSetArray); 3804 begin 3805 FFollowing := Value; 3806 end; 3807 3808 procedure TRecognizerSharedState.SetFollowingStackPointer(const Value: Integer); 3809 begin 3810 FFollowingStackPointer := Value; 3811 end; 3812 3813 procedure TRecognizerSharedState.SetLastErrorIndex(const Value: Integer); 3814 begin 3815 FLastErrorIndex := Value; 3816 end; 3817 3818 procedure TRecognizerSharedState.SetRuleMemoCount(const Value: Integer); 3819 begin 3820 SetLength(FRuleMemo, Value); 3821 end; 3822 3823 procedure TRecognizerSharedState.SetSyntaxErrors(const Value: Integer); 3824 begin 3825 FSyntaxErrors := Value; 3826 end; 3827 3828 procedure TRecognizerSharedState.SetText(const Value: String); 3829 begin 3830 FText := Value; 3831 end; 3832 3833 procedure TRecognizerSharedState.SetToken(const Value: IToken); 3834 begin 3835 FToken := Value; 3836 end; 3837 3838 procedure TRecognizerSharedState.SetTokenStartCharIndex(const Value: Integer); 3839 begin 3840 FTokenStartCharIndex := Value; 3841 end; 3842 3843 procedure TRecognizerSharedState.SetTokenStartCharPositionInLine( 3844 const Value: Integer); 3845 begin 3846 FTokenStartCharPositionInLine := Value; 3847 end; 3848 3849 procedure TRecognizerSharedState.SetTokenStartLine(const Value: Integer); 3850 begin 3851 FTokenStartLine := Value; 3852 end; 3853 3854 procedure TRecognizerSharedState.SetTokenType(const Value: Integer); 3855 begin 3856 FTokenType := Value; 3857 end; 3858 3859 { TCommonToken } 3860 3861 constructor TCommonToken.Create; 3862 begin 3863 inherited; 3864 FChannel := TToken.DEFAULT_CHANNEL; 3865 FCharPositionInLine := -1; 3866 FIndex := -1; 3867 end; 3868 3869 constructor TCommonToken.Create(const ATokenType: Integer); 3870 begin 3871 Create; 3872 FTokenType := ATokenType; 3873 end; 3874 3875 constructor TCommonToken.Create(const AInput: ICharStream; const ATokenType, 3876 AChannel, AStart, AStop: Integer); 3877 begin 3878 Create; 3879 FInput := AInput; 3880 FTokenType := ATokenType; 3881 FChannel := AChannel; 3882 FStart := AStart; 3883 FStop := AStop; 3884 end; 3885 3886 constructor TCommonToken.Create(const ATokenType: Integer; const AText: String); 3887 begin 3888 Create; 3889 FTokenType := ATokenType; 3890 FChannel := TToken.DEFAULT_CHANNEL; 3891 FText := AText; 3892 end; 3893 3894 function TCommonToken.GetChannel: Integer; 3895 begin 3896 Result := FChannel; 3897 end; 3898 3899 function TCommonToken.GetCharPositionInLine: Integer; 3900 begin 3901 Result := FCharPositionInLine; 3902 end; 3903 3904 function TCommonToken.GetInputStream: ICharStream; 3905 begin 3906 Result := FInput; 3907 end; 3908 3909 function TCommonToken.GetLine: Integer; 3910 begin 3911 Result := FLine; 3912 end; 3913 3914 function TCommonToken.GetStartIndex: Integer; 3915 begin 3916 Result := FStart; 3917 end; 3918 3919 function TCommonToken.GetStopIndex: Integer; 3920 begin 3921 Result := FStop; 3922 end; 3923 3924 function TCommonToken.GetText: String; 3925 begin 3926 if (FText <> '') then 3927 Result := FText 3928 else 3929 if (FInput = nil) then 3930 Result := '' 3931 else 3932 Result := FInput.Substring(FStart, FStop); 3933 end; 3934 3935 function TCommonToken.GetTokenIndex: Integer; 3936 begin 3937 Result := FIndex; 3938 end; 3939 3940 function TCommonToken.GetTokenType: Integer; 3941 begin 3942 Result := FTokenType; 3943 end; 3944 3945 procedure TCommonToken.SetChannel(const Value: Integer); 3946 begin 3947 FChannel := Value; 3948 end; 3949 3950 procedure TCommonToken.SetCharPositionInLine(const Value: Integer); 3951 begin 3952 FCharPositionInLine := Value; 3953 end; 3954 3955 procedure TCommonToken.SetInputStream(const Value: ICharStream); 3956 begin 3957 FInput := Value; 3958 end; 3959 3960 procedure TCommonToken.SetLine(const Value: Integer); 3961 begin 3962 FLine := Value; 3963 end; 3964 3965 procedure TCommonToken.SetStartIndex(const Value: Integer); 3966 begin 3967 FStart := Value; 3968 end; 3969 3970 procedure TCommonToken.SetStopIndex(const Value: Integer); 3971 begin 3972 FStop := Value; 3973 end; 3974 3975 procedure TCommonToken.SetText(const Value: String); 3976 begin 3977 (* Override the text for this token. The property getter 3978 * will return this text rather than pulling from the buffer. 3979 * Note that this does not mean that start/stop indexes are 3980 * not valid. It means that the input was converted to a new 3981 * string in the token object. 3982 *) 3983 FText := Value; 3984 end; 3985 3986 procedure TCommonToken.SetTokenIndex(const Value: Integer); 3987 begin 3988 FIndex := Value; 3989 end; 3990 3991 procedure TCommonToken.SetTokenType(const Value: Integer); 3992 begin 3993 FTokenType := Value; 3994 end; 3995 3996 function TCommonToken.ToString: String; 3997 var 3998 ChannelStr, Txt: String; 3999 begin 4000 if (FChannel > 0) then 4001 ChannelStr := ',channel=' + IntToStr(FChannel) 4002 else 4003 ChannelStr := ''; 4004 4005 Txt := GetText; 4006 if (Txt <> '') then 4007 begin 4008 Txt := ReplaceStr(Txt,#10,'\n'); 4009 Txt := ReplaceStr(Txt,#13,'\r'); 4010 Txt := ReplaceStr(Txt,#9,'\t'); 4011 end else 4012 Txt := '<no text>'; 4013 4014 Result := Format('[@%d,%d:%d=''%s'',<%d>%s,%d:%d]', 4015 [FIndex,FStart,FStop,Txt,FTokenType,ChannelStr,FLine,FCharPositionInLine]); 4016 end; 4017 4018 constructor TCommonToken.Create(const AOldToken: IToken); 4019 var 4020 OldCommonToken: ICommonToken; 4021 begin 4022 Create; 4023 FText := AOldToken.Text; 4024 FTokenType := AOldToken.TokenType; 4025 FLine := AOldToken.Line; 4026 FIndex := AOldToken.TokenIndex; 4027 FCharPositionInLine := AOldToken.CharPositionInLine; 4028 FChannel := AOldToken.Channel; 4029 if Supports(AOldToken, ICommonToken, OldCommonToken) then 4030 begin 4031 FStart := OldCommonToken.StartIndex; 4032 FStop := OldCommonToken.StopIndex; 4033 end; 4034 end; 4035 4036 { TClassicToken } 4037 4038 constructor TClassicToken.Create(const AOldToken: IToken); 4039 begin 4040 inherited Create; 4041 FText := AOldToken.Text; 4042 FTokenType := AOldToken.TokenType; 4043 FLine := AOldToken.Line; 4044 FCharPositionInLine := AOldToken.CharPositionInLine; 4045 FChannel := AOldToken.Channel; 4046 end; 4047 4048 constructor TClassicToken.Create(const ATokenType: Integer); 4049 begin 4050 inherited Create; 4051 FTokenType := ATokenType; 4052 end; 4053 4054 constructor TClassicToken.Create(const ATokenType: Integer; const AText: String; 4055 const AChannel: Integer); 4056 begin 4057 inherited Create; 4058 FTokenType := ATokenType; 4059 FText := AText; 4060 FChannel := AChannel; 4061 end; 4062 4063 constructor TClassicToken.Create(const ATokenType: Integer; 4064 const AText: String); 4065 begin 4066 inherited Create; 4067 FTokenType := ATokenType; 4068 FText := AText; 4069 end; 4070 4071 function TClassicToken.GetChannel: Integer; 4072 begin 4073 Result := FChannel; 4074 end; 4075 4076 function TClassicToken.GetCharPositionInLine: Integer; 4077 begin 4078 Result := FCharPositionInLine; 4079 end; 4080 4081 function TClassicToken.GetInputStream: ICharStream; 4082 begin 4083 // No default implementation 4084 Result := nil; 4085 end; 4086 4087 function TClassicToken.GetLine: Integer; 4088 begin 4089 Result := FLine; 4090 end; 4091 4092 function TClassicToken.GetText: String; 4093 begin 4094 Result := FText; 4095 end; 4096 4097 function TClassicToken.GetTokenIndex: Integer; 4098 begin 4099 Result := FIndex; 4100 end; 4101 4102 function TClassicToken.GetTokenType: Integer; 4103 begin 4104 Result := FTokenType; 4105 end; 4106 4107 procedure TClassicToken.SetChannel(const Value: Integer); 4108 begin 4109 FChannel := Value; 4110 end; 4111 4112 procedure TClassicToken.SetCharPositionInLine(const Value: Integer); 4113 begin 4114 FCharPositionInLine := Value; 4115 end; 4116 4117 procedure TClassicToken.SetInputStream(const Value: ICharStream); 4118 begin 4119 // No default implementation 4120 end; 4121 4122 procedure TClassicToken.SetLine(const Value: Integer); 4123 begin 4124 FLine := Value; 4125 end; 4126 4127 procedure TClassicToken.SetText(const Value: String); 4128 begin 4129 FText := Value; 4130 end; 4131 4132 procedure TClassicToken.SetTokenIndex(const Value: Integer); 4133 begin 4134 FIndex := Value; 4135 end; 4136 4137 procedure TClassicToken.SetTokenType(const Value: Integer); 4138 begin 4139 FTokenType := Value; 4140 end; 4141 4142 function TClassicToken.ToString: String; 4143 var 4144 ChannelStr, Txt: String; 4145 begin 4146 if (FChannel > 0) then 4147 ChannelStr := ',channel=' + IntToStr(FChannel) 4148 else 4149 ChannelStr := ''; 4150 Txt := FText; 4151 if (Txt <> '') then 4152 begin 4153 Txt := ReplaceStr(Txt,#10,'\n'); 4154 Txt := ReplaceStr(Txt,#13,'\r'); 4155 Txt := ReplaceStr(Txt,#9,'\t'); 4156 end else 4157 Txt := '<no text>'; 4158 4159 Result := Format('[@%d,''%s'',<%d>%s,%d:%d]', 4160 [FIndex,Txt,FTokenType,ChannelStr,FLine,FCharPositionInLine]); 4161 end; 4162 4163 { TToken } 4164 4165 class procedure TToken.Initialize; 4166 begin 4167 EOF_TOKEN := TCommonToken.Create(EOF); 4168 INVALID_TOKEN := TCommonToken.Create(INVALID_TOKEN_TYPE); 4169 SKIP_TOKEN := TCommonToken.Create(INVALID_TOKEN_TYPE); 4170 end; 4171 4172 { TBaseRecognizer } 4173 4174 constructor TBaseRecognizer.Create; 4175 begin 4176 inherited; 4177 FState := TRecognizerSharedState.Create; 4178 end; 4179 4180 function TBaseRecognizer.AlreadyParsedRule(const Input: IIntStream; 4181 const RuleIndex: Integer): Boolean; 4182 var 4183 StopIndex: Integer; 4184 begin 4185 StopIndex := GetRuleMemoization(RuleIndex, Input.Index); 4186 Result := (StopIndex <> MEMO_RULE_UNKNOWN); 4187 if Result then 4188 begin 4189 if (StopIndex = MEMO_RULE_FAILED) then 4190 FState.Failed := True 4191 else 4192 Input.Seek(StopIndex + 1); // jump to one past stop token 4193 end; 4194 end; 4195 4196 procedure TBaseRecognizer.BeginBacktrack(const Level: Integer); 4197 begin 4198 // No defeault implementation 4199 end; 4200 4201 procedure TBaseRecognizer.BeginResync; 4202 begin 4203 // No defeault implementation 4204 end; 4205 4206 procedure TBaseRecognizer.ConsumeUntil(const Input: IIntStream; 4207 const TokenType: Integer); 4208 var 4209 TType: Integer; 4210 begin 4211 TType := Input.LA(1); 4212 while (TType <> TToken.EOF) and (TType <> TokenType) do 4213 begin 4214 Input.Consume; 4215 TType := Input.LA(1); 4216 end; 4217 end; 4218 4219 function TBaseRecognizer.CombineFollows(const Exact: Boolean): IBitSet; 4220 var 4221 I, Top: Integer; 4222 LocalFollowSet: IBitSet; 4223 begin 4224 Top := FState.FollowingStackPointer; 4225 Result := TBitSet.Create; 4226 for I := Top downto 0 do 4227 begin 4228 LocalFollowSet := FState.Following[I]; 4229 Result.OrInPlace(LocalFollowSet); 4230 if (Exact) then 4231 begin 4232 // can we see end of rule? 4233 if LocalFollowSet.Member(TToken.EOR_TOKEN_TYPE) then 4234 begin 4235 // Only leave EOR in set if at top (start rule); this lets 4236 // us know if have to include follow(start rule); i.e., EOF 4237 if (I > 0) then 4238 Result.Remove(TToken.EOR_TOKEN_TYPE); 4239 end 4240 else 4241 // can't see end of rule, quit 4242 Break; 4243 end; 4244 end; 4245 end; 4246 4247 function TBaseRecognizer.ComputeContextSensitiveRuleFOLLOW: IBitSet; 4248 begin 4249 Result := CombineFollows(True); 4250 end; 4251 4252 function TBaseRecognizer.ComputeErrorRecoverySet: IBitSet; 4253 begin 4254 Result := CombineFollows(False); 4255 end; 4256 4257 procedure TBaseRecognizer.ConsumeUntil(const Input: IIntStream; 4258 const BitSet: IBitSet); 4259 var 4260 TType: Integer; 4261 begin 4262 TType := Input.LA(1); 4263 while (TType <> TToken.EOF) and (not BitSet.Member(TType)) do 4264 begin 4265 Input.Consume; 4266 TType := Input.LA(1); 4267 end; 4268 end; 4269 4270 constructor TBaseRecognizer.Create(const AState: IRecognizerSharedState); 4271 begin 4272 if (AState = nil) then 4273 Create 4274 else 4275 begin 4276 inherited Create; 4277 FState := AState; 4278 end; 4279 end; 4280 4281 procedure TBaseRecognizer.DisplayRecognitionError( 4282 const TokenNames: TStringArray; const E: ERecognitionException); 4283 var 4284 Hdr, Msg: String; 4285 begin 4286 Hdr := GetErrorHeader(E); 4287 Msg := GetErrorMessage(E, TokenNames); 4288 EmitErrorMessage(Hdr + ' ' + Msg); 4289 end; 4290 4291 procedure TBaseRecognizer.EmitErrorMessage(const Msg: String); 4292 begin 4293 WriteLn(Msg); 4294 end; 4295 4296 procedure TBaseRecognizer.EndBacktrack(const Level: Integer; 4297 const Successful: Boolean); 4298 begin 4299 // No defeault implementation 4300 end; 4301 4302 procedure TBaseRecognizer.EndResync; 4303 begin 4304 // No defeault implementation 4305 end; 4306 4307 function TBaseRecognizer.GetBacktrackingLevel: Integer; 4308 begin 4309 Result := FState.Backtracking; 4310 end; 4311 4312 function TBaseRecognizer.GetCurrentInputSymbol( 4313 const Input: IIntStream): IANTLRInterface; 4314 begin 4315 // No defeault implementation 4316 Result := nil; 4317 end; 4318 4319 function TBaseRecognizer.GetErrorHeader(const E: ERecognitionException): String; 4320 begin 4321 Result := 'line ' + IntToStr(E.Line) + ':' + IntToStr(E.CharPositionInLine); 4322 end; 4323 4324 function TBaseRecognizer.GetErrorMessage(const E: ERecognitionException; 4325 const TokenNames: TStringArray): String; 4326 var 4327 UTE: EUnwantedTokenException absolute E; 4328 MTE: EMissingTokenException absolute E; 4329 MMTE: EMismatchedTokenException absolute E; 4330 MTNE: EMismatchedTreeNodeException absolute E; 4331 NVAE: ENoViableAltException absolute E; 4332 EEE: EEarlyExitException absolute E; 4333 MSE: EMismatchedSetException absolute E; 4334 MNSE: EMismatchedNotSetException absolute E; 4335 FPE: EFailedPredicateException absolute E; 4336 TokenName: String; 4337 begin 4338 Result := E.Message; 4339 if (E is EUnwantedTokenException) then 4340 begin 4341 if (UTE.Expecting = TToken.EOF) then 4342 TokenName := 'EOF' 4343 else 4344 TokenName := TokenNames[UTE.Expecting]; 4345 Result := 'extraneous input ' + GetTokenErrorDisplay(UTE.UnexpectedToken) 4346 + ' expecting ' + TokenName; 4347 end 4348 else 4349 if (E is EMissingTokenException) then 4350 begin 4351 if (MTE.Expecting = TToken.EOF) then 4352 TokenName := 'EOF' 4353 else 4354 TokenName := TokenNames[MTE.Expecting]; 4355 Result := 'missing ' + TokenName + ' at ' + GetTokenErrorDisplay(E.Token); 4356 end 4357 else 4358 if (E is EMismatchedTokenException) then 4359 begin 4360 if (MMTE.Expecting = TToken.EOF) then 4361 TokenName := 'EOF' 4362 else 4363 TokenName := TokenNames[MMTE.Expecting]; 4364 Result := 'mismatched input ' + GetTokenErrorDisplay(E.Token) 4365 + ' expecting ' + TokenName; 4366 end 4367 else 4368 if (E is EMismatchedTreeNodeException) then 4369 begin 4370 if (MTNE.Expecting = TToken.EOF) then 4371 Result := 'EOF' 4372 else 4373 Result := TokenNames[MTNE.Expecting]; 4374 // The ternary operator is only necessary because of a bug in the .NET framework 4375 Result := 'mismatched tree node: '; 4376 if (MTNE.Node <> nil) and (MTNE.Node.ToString <> '') then 4377 Result := Result + MTNE.Node.ToString; 4378 Result := Result + ' expecting ' + TokenName; 4379 end 4380 else 4381 if (E is ENoViableAltException) then 4382 begin 4383 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" 4384 // and "(decision="+nvae.decisionNumber+") and 4385 // "state "+nvae.stateNumber 4386 Result := 'no viable alternative at input ' + GetTokenErrorDisplay(E.Token); 4387 end 4388 else 4389 if (E is EEarlyExitException) then 4390 begin 4391 // for development, can add "(decision="+eee.decisionNumber+")" 4392 Result := 'required (...)+ loop did not match anyting at input ' 4393 + GetTokenErrorDisplay(E.Token); 4394 end else 4395 if (E is EMismatchedSetException) then 4396 begin 4397 Result := 'mismatched input ' + GetTokenErrorDisplay(E.Token) 4398 + ' expecting set ' + MSE.Expecting.ToString; 4399 end 4400 else 4401 if (E is EMismatchedNotSetException) then 4402 begin 4403 Result := 'mismatched input ' + GetTokenErrorDisplay(E.Token) 4404 + ' expecting set ' + MSE.Expecting.ToString; 4405 end 4406 else 4407 if (E is EFailedPredicateException) then 4408 begin 4409 Result := 'rule ' + FPE.RuleName 4410 + ' failed predicate: {' + FPE.PredicateText + '}?'; 4411 end; 4412 end; 4413 4414 function TBaseRecognizer.GetGrammarFileName: String; 4415 begin 4416 // No defeault implementation 4417 Result := ''; 4418 end; 4419 4420 function TBaseRecognizer.GetMissingSymbol(const Input: IIntStream; 4421 const E: ERecognitionException; const ExpectedTokenType: Integer; 4422 const Follow: IBitSet): IANTLRInterface; 4423 begin 4424 // No defeault implementation 4425 Result := nil; 4426 end; 4427 4428 function TBaseRecognizer.GetNumberOfSyntaxErrors: Integer; 4429 begin 4430 Result := FState.SyntaxErrors; 4431 end; 4432 4433 function TBaseRecognizer.GetRuleMemoization(const RuleIndex, 4434 RuleStartIndex: Integer): Integer; 4435 var 4436 Dict: IDictionary<Integer, Integer>; 4437 begin 4438 Dict := FState.RuleMemo[RuleIndex]; 4439 if (Dict = nil) then 4440 begin 4441 Dict := TDictionary<Integer, Integer>.Create; 4442 FState.RuleMemo[RuleIndex] := Dict; 4443 end; 4444 if (not Dict.TryGetValue(RuleStartIndex, Result)) then 4445 Result := MEMO_RULE_UNKNOWN; 4446 end; 4447 4448 function TBaseRecognizer.GetRuleMemoizationChaceSize: Integer; 4449 var 4450 RuleMap: IDictionary<Integer, Integer>; 4451 begin 4452 Result := 0; 4453 if Assigned(FState.RuleMemo) then 4454 begin 4455 for RuleMap in FState.RuleMemo do 4456 if Assigned(RuleMap) then 4457 Inc(Result,RuleMap.Count); // how many input indexes are recorded? 4458 end; 4459 end; 4460 4461 function TBaseRecognizer.GetState: IRecognizerSharedState; 4462 begin 4463 Result := FState; 4464 end; 4465 4466 function TBaseRecognizer.GetTokenErrorDisplay(const T: IToken): String; 4467 begin 4468 Result := T.Text; 4469 if (Result = '') then 4470 begin 4471 if (T.TokenType = TToken.EOF) then 4472 Result := '<EOF>' 4473 else 4474 Result := '<' + IntToStr(T.TokenType) + '>'; 4475 end; 4476 Result := ReplaceStr(Result,#10,'\n'); 4477 Result := ReplaceStr(Result,#13,'\r'); 4478 Result := ReplaceStr(Result,#9,'\t'); 4479 Result := '''' + Result + ''''; 4480 end; 4481 4482 function TBaseRecognizer.GetTokenNames: TStringArray; 4483 begin 4484 // no default implementation 4485 Result := nil; 4486 end; 4487 4488 function TBaseRecognizer.Match(const Input: IIntStream; 4489 const TokenType: Integer; const Follow: IBitSet): IANTLRInterface; 4490 begin 4491 Result := GetCurrentInputSymbol(Input); 4492 if (Input.LA(1) = TokenType) then 4493 begin 4494 Input.Consume; 4495 FState.ErrorRecovery := False; 4496 FState.Failed := False; 4497 end else 4498 begin 4499 if (FState.Backtracking > 0) then 4500 FState.Failed := True 4501 else 4502 begin 4503 Mismatch(Input, TokenType, Follow); 4504 Result := RecoverFromMismatchedToken(Input, TokenType, Follow); 4505 end; 4506 end; 4507 end; 4508 4509 procedure TBaseRecognizer.MatchAny(const Input: IIntStream); 4510 begin 4511 FState.ErrorRecovery := False; 4512 FState.Failed := False; 4513 Input.Consume; 4514 end; 4515 4516 procedure TBaseRecognizer.Memoize(const Input: IIntStream; const RuleIndex, 4517 RuleStartIndex: Integer); 4518 var 4519 StopTokenIndex: Integer; 4520 Dict: IDictionary<Integer, Integer>; 4521 begin 4522 Dict := FState.RuleMemo[RuleIndex]; 4523 if Assigned(Dict) then 4524 begin 4525 if FState.Failed then 4526 StopTokenIndex := MEMO_RULE_FAILED 4527 else 4528 StopTokenIndex := Input.Index - 1; 4529 Dict.AddOrSetValue(RuleStartIndex, StopTokenIndex); 4530 end; 4531 end; 4532 4533 procedure TBaseRecognizer.Mismatch(const Input: IIntStream; 4534 const TokenType: Integer; const Follow: IBitSet); 4535 begin 4536 if MismatchIsUnwantedToken(Input, TokenType) then 4537 raise EUnwantedTokenException.Create(TokenType, Input) 4538 else 4539 if MismatchIsMissingToken(Input, Follow) then 4540 raise EMissingTokenException.Create(TokenType, Input, nil) 4541 else 4542 raise EMismatchedTokenException.Create(TokenType, Input); 4543 end; 4544 4545 function TBaseRecognizer.MismatchIsMissingToken(const Input: IIntStream; 4546 const Follow: IBitSet): Boolean; 4547 var 4548 ViableTokensFollowingThisRule, Follow2: IBitSet; 4549 begin 4550 if (Follow = nil) then 4551 // we have no information about the follow; we can only consume 4552 // a single token and hope for the best 4553 Result := False 4554 else 4555 begin 4556 Follow2 := Follow; 4557 // compute what can follow this grammar element reference 4558 if (Follow.Member(TToken.EOR_TOKEN_TYPE)) then 4559 begin 4560 ViableTokensFollowingThisRule := ComputeContextSensitiveRuleFOLLOW(); 4561 Follow2 := Follow.BitSetOr(ViableTokensFollowingThisRule); 4562 if (FState.FollowingStackPointer >= 0) then 4563 // remove EOR if we're not the start symbol 4564 Follow2.Remove(TToken.EOR_TOKEN_TYPE); 4565 end; 4566 4567 // if current token is consistent with what could come after set 4568 // then we know we're missing a token; error recovery is free to 4569 // "insert" the missing token 4570 4571 // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR 4572 // in follow set to indicate that the fall of the start symbol is 4573 // in the set (EOF can follow). 4574 if (Follow2.Member(Input.LA(1)) or Follow2.Member(TToken.EOR_TOKEN_TYPE)) then 4575 Result := True 4576 else 4577 Result := False; 4578 end; 4579 end; 4580 4581 function TBaseRecognizer.MismatchIsUnwantedToken(const Input: IIntStream; 4582 const TokenType: Integer): Boolean; 4583 begin 4584 Result := (Input.LA(2) = TokenType); 4585 end; 4586 4587 procedure TBaseRecognizer.PushFollow(const FSet: IBitSet); 4588 var 4589 F: TBitSetArray; 4590 I: Integer; 4591 begin 4592 if ((FState.FollowingStackPointer + 1) >= Length(FState.Following)) then 4593 begin 4594 SetLength(F, Length(FState.Following) * 2); 4595 FillChar(F[0], Length(F) * SizeOf(IBitSet), 0); 4596 for I := 0 to Length(FState.Following) - 1 do 4597 F[I] := FState.Following[I]; 4598 FState.Following := F; 4599 end; 4600 FState.FollowingStackPointer := FState.FollowingStackPointer + 1; 4601 FState.Following[FState.FollowingStackPointer] := FSet; 4602 end; 4603 4604 procedure TBaseRecognizer.Recover(const Input: IIntStream; 4605 const RE: ERecognitionException); 4606 var 4607 FollowSet: IBitSet; 4608 begin 4609 if (FState.LastErrorIndex = Input.Index) then 4610 // uh oh, another error at same token index; must be a case 4611 // where LT(1) is in the recovery token set so nothing is 4612 // consumed; consume a single token so at least to prevent 4613 // an infinite loop; this is a failsafe. 4614 Input.Consume; 4615 FState.LastErrorIndex := Input.Index; 4616 FollowSet := ComputeErrorRecoverySet; 4617 BeginResync; 4618 ConsumeUntil(Input,FollowSet); 4619 EndResync; 4620 end; 4621 4622 function TBaseRecognizer.RecoverFromMismatchedSet(const Input: IIntStream; 4623 const E: ERecognitionException; const Follow: IBitSet): IANTLRInterface; 4624 begin 4625 if MismatchIsMissingToken(Input, Follow) then 4626 begin 4627 ReportError(E); 4628 // we don't know how to conjure up a token for sets yet 4629 Result := GetMissingSymbol(Input, E, TToken.INVALID_TOKEN_TYPE, Follow); 4630 end 4631 else 4632 begin 4633 // TODO do single token deletion like above for Token mismatch 4634 Result := nil; 4635 raise E; 4636 end; 4637 end; 4638 4639 function TBaseRecognizer.RecoverFromMismatchedToken(const Input: IIntStream; 4640 const TokenType: Integer; const Follow: IBitSet): IANTLRInterface; 4641 var 4642 E: ERecognitionException; 4643 begin 4644 // if next token is what we are looking for then "delete" this token 4645 if MismatchIsUnwantedToken(Input, TokenType) then 4646 begin 4647 E := EUnwantedTokenException.Create(TokenType, Input); 4648 BeginResync; 4649 Input.Consume; // simply delete extra token 4650 EndResync; 4651 ReportError(E); // report after consuming so AW sees the token in the exception 4652 // we want to return the token we're actually matching 4653 Result := GetCurrentInputSymbol(Input); 4654 Input.Consume; // move past ttype token as if all were ok 4655 end 4656 else 4657 begin 4658 // can't recover with single token deletion, try insertion 4659 if MismatchIsMissingToken(Input, Follow) then 4660 begin 4661 E := nil; 4662 Result := GetMissingSymbol(Input, E, TokenType, Follow); 4663 E := EMissingTokenException.Create(TokenType, Input, Result); 4664 ReportError(E); // report after inserting so AW sees the token in the exception 4665 end 4666 else 4667 begin 4668 // even that didn't work; must throw the exception 4669 raise EMismatchedTokenException.Create(TokenType, Input); 4670 end; 4671 end; 4672 end; 4673 4674 procedure TBaseRecognizer.ReportError(const E: ERecognitionException); 4675 begin 4676 // if we've already reported an error and have not matched a token 4677 // yet successfully, don't report any errors. 4678 if (not FState.ErrorRecovery) then 4679 begin 4680 FState.SyntaxErrors := FState.SyntaxErrors + 1; // don't count spurious 4681 FState.ErrorRecovery := True; 4682 DisplayRecognitionError(GetTokenNames, E); 4683 end; 4684 end; 4685 4686 procedure TBaseRecognizer.Reset; 4687 var 4688 I: Integer; 4689 begin 4690 // wack everything related to error recovery 4691 if (FState = nil) then 4692 Exit; // no shared state work to do 4693 4694 FState.FollowingStackPointer := -1; 4695 FState.ErrorRecovery := False; 4696 FState.LastErrorIndex := -1; 4697 FState.Failed := False; 4698 FState.SyntaxErrors := 0; 4699 4700 // wack everything related to backtracking and memoization 4701 FState.Backtracking := 0; 4702 if Assigned(FState.RuleMemo) then 4703 for I := 0 to Length(FState.RuleMemo) - 1 do 4704 begin 4705 // wipe cache 4706 FState.RuleMemo[I] := nil; 4707 end; 4708 end; 4709 4710 function TBaseRecognizer.ToStrings(const Tokens: IList<IToken>): IList<String>; 4711 var 4712 Token: IToken; 4713 begin 4714 if (Tokens = nil) then 4715 Result := nil 4716 else 4717 begin 4718 Result := TList<String>.Create; 4719 for Token in Tokens do 4720 Result.Add(Token.Text); 4721 end; 4722 end; 4723 4724 procedure TBaseRecognizer.TraceIn(const RuleName: String; 4725 const RuleIndex: Integer; const InputSymbol: String); 4726 begin 4727 Write('enter ' + RuleName + ' ' + InputSymbol); 4728 if (FState.Failed) then 4729 WriteLn(' failed=True'); 4730 if (FState.Backtracking > 0) then 4731 Write(' backtracking=' + IntToStr(FState.Backtracking)); 4732 WriteLn; 4733 end; 4734 4735 procedure TBaseRecognizer.TraceOut(const RuleName: String; 4736 const RuleIndex: Integer; const InputSymbol: String); 4737 begin 4738 Write('exit ' + RuleName + ' ' + InputSymbol); 4739 if (FState.Failed) then 4740 WriteLn(' failed=True'); 4741 if (FState.Backtracking > 0) then 4742 Write(' backtracking=' + IntToStr(FState.Backtracking)); 4743 WriteLn; 4744 end; 4745 4746 { TCommonTokenStream } 4747 4748 procedure TCommonTokenStream.Consume; 4749 begin 4750 if (FP < FTokens.Count) then 4751 begin 4752 Inc(FP); 4753 FP := SkipOffTokenChannels(FP); // leave p on valid token 4754 end; 4755 end; 4756 4757 constructor TCommonTokenStream.Create; 4758 begin 4759 inherited; 4760 FP := -1; 4761 FChannel := TToken.DEFAULT_CHANNEL; 4762 FTokens := TList<IToken>.Create; 4763 FTokens.Capacity := 500; 4764 end; 4765 4766 constructor TCommonTokenStream.Create(const ATokenSource: ITokenSource); 4767 begin 4768 Create; 4769 FTokenSource := ATokenSource; 4770 end; 4771 4772 procedure TCommonTokenStream.DiscardOffChannelTokens(const Discard: Boolean); 4773 begin 4774 FDiscardOffChannelTokens := Discard; 4775 end; 4776 4777 procedure TCommonTokenStream.DiscardTokenType(const TType: Integer); 4778 begin 4779 if (FDiscardSet = nil) then 4780 FDiscardSet := THashList<Integer, Integer>.Create; 4781 FDiscardSet.Add(TType, TType); 4782 end; 4783 4784 procedure TCommonTokenStream.FillBuffer; 4785 var 4786 Index: Integer; 4787 T: IToken; 4788 Discard: Boolean; 4789 begin 4790 Index := 0; 4791 T := FTokenSource.NextToken; 4792 while Assigned(T) and (T.TokenType <> Integer(cscEOF)) do 4793 begin 4794 Discard := False; 4795 // is there a channel override for token type? 4796 if Assigned(FChannelOverrideMap) then 4797 if FChannelOverrideMap.ContainsKey(T.TokenType) then 4798 T.Channel := FChannelOverrideMap[T.TokenType]; 4799 4800 if Assigned(FDiscardSet) and FDiscardSet.ContainsKey(T.TokenType) then 4801 Discard := True 4802 else 4803 if FDiscardOffChannelTokens and (T.Channel <> FChannel) then 4804 Discard := True; 4805 4806 if (not Discard) then 4807 begin 4808 T.TokenIndex := Index; 4809 FTokens.Add(T); 4810 Inc(Index); 4811 end; 4812 4813 T := FTokenSource.NextToken; 4814 end; 4815 // leave p pointing at first token on channel 4816 FP := 0; 4817 FP := SkipOffTokenChannels(FP); 4818 end; 4819 4820 function TCommonTokenStream.Get(const I: Integer): IToken; 4821 begin 4822 Result := FTokens[I]; 4823 end; 4824 4825 function TCommonTokenStream.GetSourceName: String; 4826 begin 4827 Result := FTokenSource.SourceName; 4828 end; 4829 4830 function TCommonTokenStream.GetTokens(const Start, Stop: Integer; 4831 const Types: IList<Integer>): IList<IToken>; 4832 begin 4833 Result := GetTokens(Start, Stop, TBitSet.Create(Types)); 4834 end; 4835 4836 function TCommonTokenStream.GetTokens(const Start, Stop, 4837 TokenType: Integer): IList<IToken>; 4838 begin 4839 Result := GetTokens(Start, Stop, TBitSet.BitSetOf(TokenType)); 4840 end; 4841 4842 function TCommonTokenStream.GetTokens(const Start, Stop: Integer; 4843 const Types: IBitSet): IList<IToken>; 4844 var 4845 I, StartIndex, StopIndex: Integer; 4846 T: IToken; 4847 begin 4848 if (FP = -1) then 4849 FillBuffer; 4850 StopIndex := Min(Stop,FTokens.Count - 1); 4851 StartIndex := Max(Start,0); 4852 if (StartIndex > StopIndex) then 4853 Result := nil 4854 else 4855 begin 4856 Result := TList<IToken>.Create; 4857 for I := StartIndex to StopIndex do 4858 begin 4859 T := FTokens[I]; 4860 if (Types = nil) or Types.Member(T.TokenType) then 4861 Result.Add(T); 4862 end; 4863 if (Result.Count = 0) then 4864 Result := nil; 4865 end; 4866 end; 4867 4868 function TCommonTokenStream.GetTokens: IList<IToken>; 4869 begin 4870 if (FP = -1) then 4871 FillBuffer; 4872 Result := FTokens; 4873 end; 4874 4875 function TCommonTokenStream.GetTokens(const Start, 4876 Stop: Integer): IList<IToken>; 4877 begin 4878 Result := GetTokens(Start, Stop, IBitSet(nil)); 4879 end; 4880 4881 function TCommonTokenStream.GetTokenSource: ITokenSource; 4882 begin 4883 Result := FTokenSource; 4884 end; 4885 4886 function TCommonTokenStream.Index: Integer; 4887 begin 4888 Result := FP; 4889 end; 4890 4891 function TCommonTokenStream.LA(I: Integer): Integer; 4892 begin 4893 Result := LT(I).TokenType; 4894 end; 4895 4896 function TCommonTokenStream.LAChar(I: Integer): Char; 4897 begin 4898 Result := Char(LA(I)); 4899 end; 4900 4901 function TCommonTokenStream.LB(const K: Integer): IToken; 4902 var 4903 I, N: Integer; 4904 begin 4905 if (FP = -1) then 4906 FillBuffer; 4907 if (K = 0) then 4908 Result := nil 4909 else 4910 if ((FP - K) < 0) then 4911 Result := nil 4912 else 4913 begin 4914 I := FP; 4915 N := 1; 4916 // find k good tokens looking backwards 4917 while (N <= K) do 4918 begin 4919 // skip off-channel tokens 4920 I := SkipOffTokenChannelsReverse(I - 1); // leave p on valid token 4921 Inc(N); 4922 end; 4923 if (I < 0) then 4924 Result := nil 4925 else 4926 Result := FTokens[I]; 4927 end; 4928 end; 4929 4930 function TCommonTokenStream.LT(const K: Integer): IToken; 4931 var 4932 I, N: Integer; 4933 begin 4934 if (FP = -1) then 4935 FillBuffer; 4936 if (K = 0) then 4937 Result := nil 4938 else 4939 if (K < 0) then 4940 Result := LB(-K) 4941 else 4942 if ((FP + K - 1) >= FTokens.Count) then 4943 Result := TToken.EOF_TOKEN 4944 else 4945 begin 4946 I := FP; 4947 N := 1; 4948 // find k good tokens 4949 while (N < K) do 4950 begin 4951 // skip off-channel tokens 4952 I := SkipOffTokenChannels(I + 1); // leave p on valid token 4953 Inc(N); 4954 end; 4955 if (I >= FTokens.Count) then 4956 Result := TToken.EOF_TOKEN 4957 else 4958 Result := FTokens[I]; 4959 end; 4960 end; 4961 4962 function TCommonTokenStream.Mark: Integer; 4963 begin 4964 if (FP = -1) then 4965 FillBuffer; 4966 FLastMarker := Index; 4967 Result := FLastMarker; 4968 end; 4969 4970 procedure TCommonTokenStream.Release(const Marker: Integer); 4971 begin 4972 // no resources to release 4973 end; 4974 4975 procedure TCommonTokenStream.Reset; 4976 begin 4977 FP := 0; 4978 FLastMarker := 0; 4979 end; 4980 4981 procedure TCommonTokenStream.Rewind(const Marker: Integer); 4982 begin 4983 Seek(Marker); 4984 end; 4985 4986 procedure TCommonTokenStream.Rewind; 4987 begin 4988 Seek(FLastMarker); 4989 end; 4990 4991 procedure TCommonTokenStream.Seek(const Index: Integer); 4992 begin 4993 FP := Index; 4994 end; 4995 4996 procedure TCommonTokenStream.SetTokenSource(const Value: ITokenSource); 4997 begin 4998 FTokenSource := Value; 4999 FTokens.Clear; 5000 FP := -1; 5001 FChannel := TToken.DEFAULT_CHANNEL; 5002 end; 5003 5004 procedure TCommonTokenStream.SetTokenTypeChannel(const TType, Channel: Integer); 5005 begin 5006 if (FChannelOverrideMap = nil) then 5007 FChannelOverrideMap := TDictionary<Integer, Integer>.Create; 5008 FChannelOverrideMap[TType] := Channel; 5009 end; 5010 5011 function TCommonTokenStream.Size: Integer; 5012 begin 5013 Result := FTokens.Count; 5014 end; 5015 5016 function TCommonTokenStream.SkipOffTokenChannels(const I: Integer): Integer; 5017 var 5018 N: Integer; 5019 begin 5020 Result := I; 5021 N := FTokens.Count; 5022 while (Result < N) and (FTokens[Result].Channel <> FChannel) do 5023 Inc(Result); 5024 end; 5025 5026 function TCommonTokenStream.SkipOffTokenChannelsReverse( 5027 const I: Integer): Integer; 5028 begin 5029 Result := I; 5030 while (Result >= 0) and (FTokens[Result].Channel <> FChannel) do 5031 Dec(Result); 5032 end; 5033 5034 function TCommonTokenStream.ToString: String; 5035 begin 5036 if (FP = -1) then 5037 FillBuffer; 5038 Result := ToString(0, FTokens.Count - 1); 5039 end; 5040 5041 function TCommonTokenStream.ToString(const Start, Stop: Integer): String; 5042 var 5043 I, Finish: Integer; 5044 Buf: TStringBuilder; 5045 T: IToken; 5046 begin 5047 if (Start < 0) or (Stop < 0) then 5048 Result := '' 5049 else 5050 begin 5051 if (FP = -1) then 5052 FillBuffer; 5053 if (Stop >= FTokens.Count) then 5054 Finish := FTokens.Count - 1 5055 else 5056 Finish := Stop; 5057 Buf := TStringBuilder.Create; 5058 try 5059 for I := Start to Finish do 5060 begin 5061 T := FTokens[I]; 5062 Buf.Append(T.Text); 5063 end; 5064 Result := Buf.ToString; 5065 finally 5066 Buf.Free; 5067 end; 5068 end; 5069 end; 5070 5071 function TCommonTokenStream.ToString(const Start, Stop: IToken): String; 5072 begin 5073 if Assigned(Start) and Assigned(Stop) then 5074 Result := ToString(Start.TokenIndex, Stop.TokenIndex) 5075 else 5076 Result := ''; 5077 end; 5078 5079 constructor TCommonTokenStream.Create(const ATokenSource: ITokenSource; 5080 const AChannel: Integer); 5081 begin 5082 Create(ATokenSource); 5083 FChannel := AChannel; 5084 end; 5085 5086 constructor TCommonTokenStream.Create(const ALexer: ILexer); 5087 begin 5088 Create(ALexer as ITokenSource); 5089 end; 5090 5091 constructor TCommonTokenStream.Create(const ALexer: ILexer; 5092 const AChannel: Integer); 5093 begin 5094 Create(ALexer as ITokenSource, AChannel); 5095 end; 5096 5097 { TDFA } 5098 5099 function TDFA.Description: String; 5100 begin 5101 Result := 'n/a'; 5102 end; 5103 5104 procedure TDFA.Error(const NVAE: ENoViableAltException); 5105 begin 5106 // No default implementation 5107 end; 5108 5109 function TDFA.GetRecognizer: IBaseRecognizer; 5110 begin 5111 Result := IBaseRecognizer(FRecognizer); 5112 end; 5113 5114 function TDFA.GetSpecialStateTransitionHandler: TSpecialStateTransitionHandler; 5115 begin 5116 Result := FSpecialStateTransitionHandler; 5117 end; 5118 5119 procedure TDFA.NoViableAlt(const S: Integer; const Input: IIntStream); 5120 var 5121 NVAE: ENoViableAltException; 5122 begin 5123 if (Recognizer.State.Backtracking > 0) then 5124 Recognizer.State.Failed := True 5125 else 5126 begin 5127 NVAE := ENoViableAltException.Create(Description, FDecisionNumber, S, Input); 5128 Error(NVAE); 5129 raise NVAE; 5130 end; 5131 end; 5132 5133 function TDFA.Predict(const Input: IIntStream): Integer; 5134 var 5135 Mark, S, SNext, SpecialState: Integer; 5136 C: Char; 5137 begin 5138 Result := 0; 5139 Mark := Input.Mark; // remember where decision started in input 5140 S := 0; // we always start at s0 5141 try 5142 while True do 5143 begin 5144 SpecialState := FSpecial[S]; 5145 if (SpecialState >= 0) then 5146 begin 5147 S := FSpecialStateTransitionHandler(Self, SpecialState, Input); 5148 if (S = -1) then 5149 begin 5150 NoViableAlt(S, Input); 5151 Exit; 5152 end; 5153 Input.Consume; 5154 Continue; 5155 end; 5156 5157 if (FAccept[S] >= 1) then 5158 begin 5159 Result := FAccept[S]; 5160 Exit; 5161 end; 5162 5163 // look for a normal char transition 5164 C := Char(Input.LA(1)); // -1 == \uFFFF, all tokens fit in 65000 space 5165 if (C >= FMin[S]) and (C <= FMax[S]) then 5166 begin 5167 SNext := FTransition[S,Integer(C) - Integer(FMin[S])]; // move to next state 5168 if (SNext < 0) then 5169 begin 5170 // was in range but not a normal transition 5171 // must check EOT, which is like the else clause. 5172 // eot[s]>=0 indicates that an EOT edge goes to another 5173 // state. 5174 if (FEOT[S] >= 0) then // EOT Transition to accept state? 5175 begin 5176 S := FEOT[S]; 5177 Input.Consume; 5178 // TODO: I had this as return accept[eot[s]] 5179 // which assumed here that the EOT edge always 5180 // went to an accept...faster to do this, but 5181 // what about predicated edges coming from EOT 5182 // target? 5183 Continue; 5184 end; 5185 5186 NoViableAlt(S, Input); 5187 Exit; 5188 end; 5189 S := SNext; 5190 Input.Consume; 5191 Continue; 5192 end; 5193 5194 if (FEOT[S] >= 0) then 5195 begin 5196 // EOT Transition? 5197 S := FEOT[S]; 5198 Input.Consume; 5199 Continue; 5200 end; 5201 5202 if (C = Char(TToken.EOF)) and (FEOF[S] >= 0) then 5203 begin 5204 // EOF Transition to accept state? 5205 Result := FAccept[FEOF[S]]; 5206 Exit; 5207 end; 5208 5209 // not in range and not EOF/EOT, must be invalid symbol 5210 NoViableAlt(S, Input); 5211 Exit; 5212 end; 5213 finally 5214 Input.Rewind(Mark); 5215 end; 5216 end; 5217 5218 procedure TDFA.SetRecognizer(const Value: IBaseRecognizer); 5219 begin 5220 FRecognizer := Pointer(Value); 5221 end; 5222 5223 procedure TDFA.SetSpecialStateTransitionHandler( 5224 const Value: TSpecialStateTransitionHandler); 5225 begin 5226 FSpecialStateTransitionHandler := Value; 5227 end; 5228 5229 function TDFA.SpecialStateTransition(const S: Integer; 5230 const Input: IIntStream): Integer; 5231 begin 5232 // No default implementation 5233 Result := -1; 5234 end; 5235 5236 function TDFA.SpecialTransition(const State, Symbol: Integer): Integer; 5237 begin 5238 Result := 0; 5239 end; 5240 5241 class function TDFA.UnpackEncodedString( 5242 const EncodedString: String): TSmallintArray; 5243 var 5244 I, J, DI, Size: Integer; 5245 N, V: Char; 5246 begin 5247 Size := 0; 5248 I := 1; 5249 while (I <= Length(EncodedString)) do 5250 begin 5251 Inc(Size,Integer(EncodedString[I])); 5252 Inc(I,2); 5253 end; 5254 5255 SetLength(Result,Size); 5256 DI := 0; 5257 I := 1; 5258 while (I <= Length(EncodedString)) do 5259 begin 5260 N := EncodedString[I]; 5261 V := EncodedString[I + 1]; 5262 // add v n times to data 5263 for J := 1 to Integer(N) do 5264 begin 5265 Result[DI] := Smallint(V); 5266 Inc(DI); 5267 end; 5268 Inc(I,2); 5269 end; 5270 end; 5271 5272 class function TDFA.UnpackEncodedStringArray( 5273 const EncodedStrings: array of String): TSmallintMatrix; 5274 var 5275 I: Integer; 5276 begin 5277 SetLength(Result,Length(EncodedStrings)); 5278 for I := 0 to Length(EncodedStrings) - 1 do 5279 Result[I] := UnpackEncodedString(EncodedStrings[I]); 5280 end; 5281 5282 class function TDFA.UnpackEncodedStringArray( 5283 const EncodedStrings: TStringArray): TSmallintMatrix; 5284 var 5285 I: Integer; 5286 begin 5287 SetLength(Result,Length(EncodedStrings)); 5288 for I := 0 to Length(EncodedStrings) - 1 do 5289 Result[I] := UnpackEncodedString(EncodedStrings[I]); 5290 end; 5291 5292 class function TDFA.UnpackEncodedStringToUnsignedChars( 5293 const EncodedString: String): TCharArray; 5294 var 5295 I, J, DI, Size: Integer; 5296 N, V: Char; 5297 begin 5298 Size := 0; 5299 I := 1; 5300 while (I <= Length(EncodedString)) do 5301 begin 5302 Inc(Size,Integer(EncodedString[I])); 5303 Inc(I,2); 5304 end; 5305 5306 SetLength(Result,Size); 5307 DI := 0; 5308 I := 1; 5309 while (I <= Length(EncodedString)) do 5310 begin 5311 N := EncodedString[I]; 5312 V := EncodedString[I + 1]; 5313 // add v n times to data 5314 for J := 1 to Integer(N) do 5315 begin 5316 Result[DI] := V; 5317 Inc(DI); 5318 end; 5319 Inc(I,2); 5320 end; 5321 end; 5322 5323 { TLexer } 5324 5325 constructor TLexer.Create; 5326 begin 5327 inherited; 5328 end; 5329 5330 constructor TLexer.Create(const AInput: ICharStream); 5331 begin 5332 inherited Create; 5333 FInput := AInput; 5334 end; 5335 5336 constructor TLexer.Create(const AInput: ICharStream; 5337 const AState: IRecognizerSharedState); 5338 begin 5339 inherited Create(AState); 5340 FInput := AInput; 5341 end; 5342 5343 function TLexer.Emit: IToken; 5344 begin 5345 Result := TCommonToken.Create(FInput, FState.TokenType, FState.Channel, 5346 FState.TokenStartCharIndex, GetCharIndex - 1); 5347 Result.Line := FState.TokenStartLine; 5348 Result.Text := FState.Text; 5349 Result.CharPositionInLine := FState.TokenStartCharPositionInLine; 5350 Emit(Result); 5351 end; 5352 5353 procedure TLexer.Emit(const Token: IToken); 5354 begin 5355 FState.Token := Token; 5356 end; 5357 5358 function TLexer.GetCharErrorDisplay(const C: Integer): String; 5359 begin 5360 case C of 5361 // TToken.EOF 5362 TOKEN_dot_EOF: 5363 Result := '<EOF>'; 5364 10: 5365 Result := '\n'; 5366 9: 5367 Result := '\t'; 5368 13: 5369 Result := '\r'; 5370 else 5371 Result := Char(C); 5372 end; 5373 Result := '''' + Result + ''''; 5374 end; 5375 5376 function TLexer.GetCharIndex: Integer; 5377 begin 5378 Result := FInput.Index; 5379 end; 5380 5381 function TLexer.GetCharPositionInLine: Integer; 5382 begin 5383 Result := FInput.CharPositionInLine; 5384 end; 5385 5386 function TLexer.GetCharStream: ICharStream; 5387 begin 5388 Result := FInput; 5389 end; 5390 5391 function TLexer.GetErrorMessage(const E: ERecognitionException; 5392 const TokenNames: TStringArray): String; 5393 var 5394 MTE: EMismatchedTokenException absolute E; 5395 NVAE: ENoViableAltException absolute E; 5396 EEE: EEarlyExitException absolute E; 5397 MNSE: EMismatchedNotSetException absolute E; 5398 MSE: EMismatchedSetException absolute E; 5399 MRE: EMismatchedRangeException absolute E; 5400 begin 5401 if (E is EMismatchedTokenException) then 5402 Result := 'mismatched character ' + GetCharErrorDisplay(E.Character) 5403 + ' expecting ' + GetCharErrorDisplay(MTE.Expecting) 5404 else 5405 if (E is ENoViableAltException) then 5406 // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>" 5407 // and "(decision="+nvae.decisionNumber+") and 5408 // "state "+nvae.stateNumber 5409 Result := 'no viable alternative at character ' + GetCharErrorDisplay(NVAE.Character) 5410 else 5411 if (E is EEarlyExitException) then 5412 // for development, can add "(decision="+eee.decisionNumber+")" 5413 Result := 'required (...)+ loop did not match anything at character ' 5414 + GetCharErrorDisplay(EEE.Character) 5415 else 5416 if (E is EMismatchedNotSetException) then 5417 Result := 'mismatched character ' + GetCharErrorDisplay(MNSE.Character) 5418 + ' expecting set ' + MNSE.Expecting.ToString 5419 else 5420 if (E is EMismatchedSetException) then 5421 Result := 'mismatched character ' + GetCharErrorDisplay(MSE.Character) 5422 + ' expecting set ' + MSE.Expecting.ToString 5423 else 5424 if (E is EMismatchedRangeException) then 5425 Result := 'mismatched character ' + GetCharErrorDisplay(MRE.Character) 5426 + ' expecting set ' + GetCharErrorDisplay(MRE.A) + '..' 5427 + GetCharErrorDisplay(MRE.B) 5428 else 5429 Result := inherited GetErrorMessage(E, TokenNames); 5430 end; 5431 5432 function TLexer.GetInput: IIntStream; 5433 begin 5434 Result := FInput; 5435 end; 5436 5437 function TLexer.GetLine: Integer; 5438 begin 5439 Result := FInput.Line; 5440 end; 5441 5442 function TLexer.GetSourceName: String; 5443 begin 5444 Result := FInput.SourceName; 5445 end; 5446 5447 function TLexer.GetText: String; 5448 begin 5449 if (FState.Text <> '') then 5450 Result := FState.Text 5451 else 5452 Result := FInput.Substring(FState.TokenStartCharIndex, GetCharIndex - 1) 5453 end; 5454 5455 procedure TLexer.Match(const S: String); 5456 var 5457 I: Integer; 5458 MTE: EMismatchedTokenException; 5459 begin 5460 for I := 1 to Length(S) do 5461 begin 5462 if (FInput.LA(1) <> Integer(S[I])) then 5463 begin 5464 if (FState.Backtracking > 0) then 5465 begin 5466 FState.Failed := True; 5467 Exit; 5468 end; 5469 MTE := EMismatchedTokenException.Create(Integer(S[I]), FInput); 5470 Recover(MTE); // don't really recover; just consume in lexer 5471 raise MTE; 5472 end; 5473 FInput.Consume; 5474 FState.Failed := False; 5475 end; 5476 end; 5477 5478 procedure TLexer.Match(const C: Integer); 5479 var 5480 MTE: EMismatchedTokenException; 5481 begin 5482 if (FInput.LA(1) <> C) then 5483 begin 5484 if (FState.Backtracking > 0) then 5485 begin 5486 FState.Failed := True; 5487 Exit; 5488 end; 5489 MTE := EMismatchedTokenException.Create(C, FInput); 5490 Recover(MTE); 5491 raise MTE; 5492 end; 5493 FInput.Consume; 5494 FState.Failed := False; 5495 end; 5496 5497 procedure TLexer.MatchAny; 5498 begin 5499 FInput.Consume; 5500 end; 5501 5502 procedure TLexer.MatchRange(const A, B: Integer); 5503 var 5504 MRE: EMismatchedRangeException; 5505 begin 5506 if (FInput.LA(1) < A) or (FInput.LA(1) > B) then 5507 begin 5508 if (FState.Backtracking > 0) then 5509 begin 5510 FState.Failed := True; 5511 Exit; 5512 end; 5513 MRE := EMismatchedRangeException.Create(A, B, FInput); 5514 Recover(MRE); 5515 raise MRE; 5516 end; 5517 FInput.Consume; 5518 FState.Failed := False; 5519 end; 5520 5521 function TLexer.NextToken: IToken; 5522 begin 5523 while True do 5524 begin 5525 FState.Token := nil; 5526 FState.Channel := TToken.DEFAULT_CHANNEL; 5527 FState.TokenStartCharIndex := FInput.Index; 5528 FState.TokenStartCharPositionInLine := FInput.CharPositionInLine; 5529 FState.TokenStartLine := Finput.Line; 5530 FState.Text := ''; 5531 if (FInput.LA(1) = Integer(cscEOF)) then 5532 begin 5533 Result := TToken.EOF_TOKEN; 5534 Exit; 5535 end; 5536 5537 try 5538 DoTokens; 5539 if (FState.Token = nil) then 5540 Emit 5541 else 5542 if (FState.Token = TToken.SKIP_TOKEN) then 5543 Continue; 5544 Exit(FState.Token); 5545 except 5546 on NVA: ENoViableAltException do 5547 begin 5548 ReportError(NVA); 5549 Recover(NVA); // throw out current char and try again 5550 end; 5551 5552 on RE: ERecognitionException do 5553 begin 5554 ReportError(RE); 5555 // Match() routine has already called Recover() 5556 end; 5557 end; 5558 end; 5559 end; 5560 5561 procedure TLexer.Recover(const RE: ERecognitionException); 5562 begin 5563 FInput.Consume; 5564 end; 5565 5566 procedure TLexer.ReportError(const E: ERecognitionException); 5567 begin 5568 DisplayRecognitionError(GetTokenNames, E); 5569 end; 5570 5571 procedure TLexer.Reset; 5572 begin 5573 inherited; // reset all recognizer state variables 5574 // wack Lexer state variables 5575 if Assigned(FInput) then 5576 FInput.Seek(0); // rewind the input 5577 if (FState = nil) then 5578 Exit; // no shared state work to do 5579 FState.Token := nil; 5580 FState.TokenType := TToken.INVALID_TOKEN_TYPE; 5581 FState.Channel := TToken.DEFAULT_CHANNEL; 5582 FState.TokenStartCharIndex := -1; 5583 FState.TokenStartCharPositionInLine := -1; 5584 FState.TokenStartLine := -1; 5585 FState.Text := ''; 5586 end; 5587 5588 procedure TLexer.SetCharStream(const Value: ICharStream); 5589 begin 5590 FInput := nil; 5591 Reset; 5592 FInput := Value; 5593 end; 5594 5595 procedure TLexer.SetText(const Value: String); 5596 begin 5597 FState.Text := Value; 5598 end; 5599 5600 procedure TLexer.Skip; 5601 begin 5602 FState.Token := TToken.SKIP_TOKEN; 5603 end; 5604 5605 procedure TLexer.TraceIn(const RuleName: String; const RuleIndex: Integer); 5606 var 5607 InputSymbol: String; 5608 begin 5609 InputSymbol := Char(FInput.LT(1)) + ' line=' + IntToStr(GetLine) + ':' 5610 + IntToStr(GetCharPositionInLine); 5611 inherited TraceIn(RuleName, RuleIndex, InputSymbol); 5612 end; 5613 5614 procedure TLexer.TraceOut(const RuleName: String; const RuleIndex: Integer); 5615 var 5616 InputSymbol: String; 5617 begin 5618 InputSymbol := Char(FInput.LT(1)) + ' line=' + IntToStr(GetLine) + ':' 5619 + IntToStr(GetCharPositionInLine); 5620 inherited TraceOut(RuleName, RuleIndex, InputSymbol); 5621 end; 5622 5623 { TParser } 5624 5625 constructor TParser.Create(const AInput: ITokenStream); 5626 begin 5627 inherited Create; // highlight that we go to base class to set state object 5628 SetTokenStream(AInput); 5629 end; 5630 5631 constructor TParser.Create(const AInput: ITokenStream; 5632 const AState: IRecognizerSharedState); 5633 begin 5634 inherited Create(AState); // share the state object with another parser 5635 SetTokenStream(AInput); 5636 end; 5637 5638 function TParser.GetCurrentInputSymbol( 5639 const Input: IIntStream): IANTLRInterface; 5640 begin 5641 Result := FInput.LT(1) 5642 end; 5643 5644 function TParser.GetInput: IIntStream; 5645 begin 5646 Result := FInput; 5647 end; 5648 5649 function TParser.GetMissingSymbol(const Input: IIntStream; 5650 const E: ERecognitionException; const ExpectedTokenType: Integer; 5651 const Follow: IBitSet): IANTLRInterface; 5652 var 5653 TokenText: String; 5654 T: ICommonToken; 5655 Current: IToken; 5656 begin 5657 if (ExpectedTokenType = TToken.EOF) then 5658 TokenText := '<missing EOF>' 5659 else 5660 TokenText := '<missing ' + GetTokenNames[ExpectedTokenType] + '>'; 5661 T := TCommonToken.Create(ExpectedTokenType, TokenText); 5662 Current := FInput.LT(1); 5663 if (Current.TokenType = TToken.EOF) then 5664 Current := FInput.LT(-1); 5665 T.Line := Current.Line; 5666 T.CharPositionInLine := Current.CharPositionInLine; 5667 T.Channel := DEFAULT_TOKEN_CHANNEL; 5668 Result := T; 5669 end; 5670 5671 function TParser.GetSourceName: String; 5672 begin 5673 Result := FInput.SourceName; 5674 end; 5675 5676 function TParser.GetTokenStream: ITokenStream; 5677 begin 5678 Result := FInput; 5679 end; 5680 5681 procedure TParser.Reset; 5682 begin 5683 inherited; // reset all recognizer state variables 5684 if Assigned(FInput) then 5685 FInput.Seek(0); // rewind the input 5686 end; 5687 5688 procedure TParser.SetTokenStream(const Value: ITokenStream); 5689 begin 5690 FInput := nil; 5691 Reset; 5692 FInput := Value; 5693 end; 5694 5695 procedure TParser.TraceIn(const RuleName: String; const RuleIndex: Integer); 5696 begin 5697 inherited TraceIn(RuleName, RuleIndex, FInput.LT(1).ToString); 5698 end; 5699 5700 procedure TParser.TraceOut(const RuleName: String; const RuleIndex: Integer); 5701 begin 5702 inherited TraceOut(RuleName, RuleIndex, FInput.LT(1).ToString); 5703 end; 5704 5705 { TRuleReturnScope } 5706 5707 function TRuleReturnScope.GetStart: IANTLRInterface; 5708 begin 5709 Result := nil; 5710 end; 5711 5712 function TRuleReturnScope.GetStop: IANTLRInterface; 5713 begin 5714 Result := nil; 5715 end; 5716 5717 function TRuleReturnScope.GetTemplate: IANTLRInterface; 5718 begin 5719 Result := nil; 5720 end; 5721 5722 function TRuleReturnScope.GetTree: IANTLRInterface; 5723 begin 5724 Result := nil; 5725 end; 5726 5727 procedure TRuleReturnScope.SetStart(const Value: IANTLRInterface); 5728 begin 5729 raise EInvalidOperation.Create('Setter has not been defined for this property.'); 5730 end; 5731 5732 procedure TRuleReturnScope.SetStop(const Value: IANTLRInterface); 5733 begin 5734 raise EInvalidOperation.Create('Setter has not been defined for this property.'); 5735 end; 5736 5737 procedure TRuleReturnScope.SetTree(const Value: IANTLRInterface); 5738 begin 5739 raise EInvalidOperation.Create('Setter has not been defined for this property.'); 5740 end; 5741 5742 { TParserRuleReturnScope } 5743 5744 function TParserRuleReturnScope.GetStart: IANTLRInterface; 5745 begin 5746 Result := FStart; 5747 end; 5748 5749 function TParserRuleReturnScope.GetStop: IANTLRInterface; 5750 begin 5751 Result := FStop; 5752 end; 5753 5754 procedure TParserRuleReturnScope.SetStart(const Value: IANTLRInterface); 5755 begin 5756 FStart := Value as IToken; 5757 end; 5758 5759 procedure TParserRuleReturnScope.SetStop(const Value: IANTLRInterface); 5760 begin 5761 FStop := Value as IToken; 5762 end; 5763 5764 { TTokenRewriteStream } 5765 5766 procedure TTokenRewriteStream.Delete(const Start, Stop: IToken); 5767 begin 5768 Delete(DEFAULT_PROGRAM_NAME, Start, Stop); 5769 end; 5770 5771 procedure TTokenRewriteStream.Delete(const IndexT: IToken); 5772 begin 5773 Delete(DEFAULT_PROGRAM_NAME, IndexT, IndexT); 5774 end; 5775 5776 constructor TTokenRewriteStream.Create; 5777 begin 5778 inherited; 5779 Init; 5780 end; 5781 5782 constructor TTokenRewriteStream.Create(const ATokenSource: ITokenSource); 5783 begin 5784 inherited Create(ATokenSource); 5785 Init; 5786 end; 5787 5788 constructor TTokenRewriteStream.Create(const ALexer: ILexer); 5789 begin 5790 Create(ALexer as ITokenSource); 5791 end; 5792 5793 constructor TTokenRewriteStream.Create(const ALexer: ILexer; 5794 const AChannel: Integer); 5795 begin 5796 Create(ALexer as ITokenSource, AChannel); 5797 end; 5798 5799 function TTokenRewriteStream.CatOpText(const A, B: IANTLRInterface): IANTLRInterface; 5800 var 5801 X, Y: String; 5802 begin 5803 if Assigned(A) then 5804 X := A.ToString 5805 else 5806 X := ''; 5807 5808 if Assigned(B) then 5809 Y := B.ToString 5810 else 5811 Y := ''; 5812 5813 Result := TANTLRString.Create(X + Y); 5814 end; 5815 5816 constructor TTokenRewriteStream.Create(const ATokenSource: ITokenSource; 5817 const AChannel: Integer); 5818 begin 5819 inherited Create(ATokenSource, AChannel); 5820 Init; 5821 end; 5822 5823 procedure TTokenRewriteStream.Delete(const ProgramName: String; const Start, 5824 Stop: IToken); 5825 begin 5826 Replace(ProgramName, Start, Stop, nil); 5827 end; 5828 5829 procedure TTokenRewriteStream.Delete(const ProgramName: String; const Start, 5830 Stop: Integer); 5831 begin 5832 Replace(ProgramName, Start, Stop, nil); 5833 end; 5834 5835 procedure TTokenRewriteStream.Delete(const Start, Stop: Integer); 5836 begin 5837 Delete(DEFAULT_PROGRAM_NAME, Start, Stop); 5838 end; 5839 5840 procedure TTokenRewriteStream.Delete(const Index: Integer); 5841 begin 5842 Delete(DEFAULT_PROGRAM_NAME, Index, Index); 5843 end; 5844 5845 procedure TTokenRewriteStream.DeleteProgram(const ProgramName: String); 5846 begin 5847 Rollback(ProgramName, MIN_TOKEN_INDEX); 5848 end; 5849 5850 procedure TTokenRewriteStream.DeleteProgram; 5851 begin 5852 DeleteProgram(DEFAULT_PROGRAM_NAME); 5853 end; 5854 5855 function TTokenRewriteStream.GetLastRewriteTokenIndex: Integer; 5856 begin 5857 Result := GetLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME); 5858 end; 5859 5860 function TTokenRewriteStream.GetKindOfOps( 5861 const Rewrites: IList<IRewriteOperation>; 5862 const Kind: TGUID): IList<IRewriteOperation>; 5863 begin 5864 Result := GetKindOfOps(Rewrites, Kind, Rewrites.Count); 5865 end; 5866 5867 function TTokenRewriteStream.GetKindOfOps( 5868 const Rewrites: IList<IRewriteOperation>; const Kind: TGUID; 5869 const Before: Integer): IList<IRewriteOperation>; 5870 var 5871 I: Integer; 5872 Op: IRewriteOperation; 5873 Obj: IInterface; 5874 begin 5875 Result := TList<IRewriteOperation>.Create; 5876 I := 0; 5877 while (I < Before) and (I < Rewrites.Count) do 5878 begin 5879 Op := Rewrites[I]; 5880 if Assigned(Op) and (Op.QueryInterface(Kind, Obj) = 0) then 5881 Result.Add(Op); 5882 Inc(I); 5883 end; 5884 end; 5885 5886 function TTokenRewriteStream.GetLastRewriteTokenIndex( 5887 const ProgramName: String): Integer; 5888 begin 5889 if (not FLastRewriteTokenIndexes.TryGetValue(ProgramName, Result)) then 5890 Result := -1; 5891 end; 5892 5893 function TTokenRewriteStream.GetProgram( 5894 const Name: String): IList<IRewriteOperation>; 5895 var 5896 InstructionStream: IList<IRewriteOperation>; 5897 begin 5898 InstructionStream := FPrograms[Name]; 5899 if (InstructionStream = nil) then 5900 InstructionStream := InitializeProgram(Name); 5901 Result := InstructionStream; 5902 end; 5903 5904 procedure TTokenRewriteStream.InsertAfter(const ProgramName: String; 5905 const T: IToken; const Text: IANTLRInterface); 5906 begin 5907 InsertAfter(ProgramName, T.TokenIndex, Text); 5908 end; 5909 5910 procedure TTokenRewriteStream.Init; 5911 var 5912 List: IList<IRewriteOperation>; 5913 begin 5914 FPrograms := TDictionary<String, IList<IRewriteOperation>>.Create; 5915 List := TList<IRewriteOperation>.Create; 5916 List.Capacity := PROGRAM_INIT_SIZE; 5917 FPrograms.Add(DEFAULT_PROGRAM_NAME, List); 5918 FLastRewriteTokenIndexes := TDictionary<String, Integer>.Create; 5919 end; 5920 5921 function TTokenRewriteStream.InitializeProgram( 5922 const Name: String): IList<IRewriteOperation>; 5923 begin 5924 Result := TList<IRewriteOperation>.Create; 5925 Result.Capacity := PROGRAM_INIT_SIZE; 5926 FPrograms[Name] := Result; 5927 end; 5928 5929 procedure TTokenRewriteStream.InsertAfter(const ProgramName: String; 5930 const Index: Integer; const Text: IANTLRInterface); 5931 begin 5932 // to insert after, just insert before next index (even if past end) 5933 InsertBefore(ProgramName, Index + 1, Text); 5934 end; 5935 5936 procedure TTokenRewriteStream.InsertAfter(const T: IToken; 5937 const Text: IANTLRInterface); 5938 begin 5939 InsertAfter(DEFAULT_PROGRAM_NAME, T, Text); 5940 end; 5941 5942 procedure TTokenRewriteStream.InsertAfter(const Index: Integer; 5943 const Text: IANTLRInterface); 5944 begin 5945 InsertAfter(DEFAULT_PROGRAM_NAME, Index, Text); 5946 end; 5947 5948 procedure TTokenRewriteStream.InsertBefore(const Index: Integer; 5949 const Text: IANTLRInterface); 5950 begin 5951 InsertBefore(DEFAULT_PROGRAM_NAME, Index, Text); 5952 end; 5953 5954 procedure TTokenRewriteStream.InsertBefore(const ProgramName: String; 5955 const T: IToken; const Text: IANTLRInterface); 5956 begin 5957 InsertBefore(ProgramName, T.TokenIndex, Text); 5958 end; 5959 5960 procedure TTokenRewriteStream.InsertBefore(const ProgramName: String; 5961 const Index: Integer; const Text: IANTLRInterface); 5962 var 5963 Op: IRewriteOperation; 5964 begin 5965 Op := TInsertBeforeOp.Create(Index, Text, Self); 5966 GetProgram(ProgramName).Add(Op); 5967 end; 5968 5969 procedure TTokenRewriteStream.InsertBefore(const T: IToken; 5970 const Text: IANTLRInterface); 5971 begin 5972 InsertBefore(DEFAULT_PROGRAM_NAME, T, Text); 5973 end; 5974 5975 procedure TTokenRewriteStream.Replace(const Start, Stop: IToken; 5976 const Text: IANTLRInterface); 5977 begin 5978 Replace(DEFAULT_PROGRAM_NAME, Stop, Stop, Text); 5979 end; 5980 5981 procedure TTokenRewriteStream.Replace(const IndexT: IToken; 5982 const Text: IANTLRInterface); 5983 begin 5984 Replace(DEFAULT_PROGRAM_NAME, IndexT, IndexT, Text); 5985 end; 5986 5987 procedure TTokenRewriteStream.Replace(const ProgramName: String; const Start, 5988 Stop: Integer; const Text: IANTLRInterface); 5989 var 5990 Op: IRewriteOperation; 5991 Rewrites: IList<IRewriteOperation>; 5992 begin 5993 if (Start > Stop) or (Start < 0) or (Stop < 0) or (Stop >= GetTokens.Count) then 5994 raise EArgumentOutOfRangeException.Create('replace: range invalid: ' 5995 + IntToStr(Start) + '..' + IntToStr(Stop) + '(size=' 5996 + IntToStr(GetTokens.Count) + ')'); 5997 5998 Op := TReplaceOp.Create(Start, Stop, Text, Self); 5999 Rewrites := GetProgram(ProgramName); 6000 Op.InstructionIndex := Rewrites.Count; 6001 Rewrites.Add(Op); 6002 end; 6003 6004 function TTokenRewriteStream.ReduceToSingleOperationPerIndex( 6005 const Rewrites: IList<IRewriteOperation>): IDictionary<Integer, IRewriteOperation>; 6006 var 6007 I, J: Integer; 6008 Op: IRewriteOperation; 6009 ROp, PrevROp: IReplaceOp; 6010 IOp, PrevIOp: IInsertBeforeOp; 6011 Inserts, PrevInserts, PrevReplaces: IList<IRewriteOperation>; 6012 Disjoint, Same: Boolean; 6013 begin 6014 // WALK REPLACES 6015 for I := 0 to Rewrites.Count - 1 do 6016 begin 6017 Op := Rewrites[I]; 6018 if (Op = nil) then 6019 Continue; 6020 if (not Supports(Op, IReplaceOp, ROp)) then 6021 Continue; 6022 6023 // Wipe prior inserts within range 6024 Inserts := GetKindOfOps(Rewrites, IInsertBeforeOp, I); 6025 for J := 0 to Inserts.Count - 1 do 6026 begin 6027 IOp := Inserts[J] as IInsertBeforeOp; 6028 if (IOp.Index >= ROp.Index) and (IOp.Index <= ROp.LastIndex) then 6029 begin 6030 // delete insert as it's a no-op. 6031 Rewrites[IOp.InstructionIndex] := nil; 6032 end; 6033 end; 6034 6035 // Drop any prior replaces contained within 6036 PrevReplaces := GetKindOfOps(Rewrites, IReplaceOp, I); 6037 for J := 0 to PrevReplaces.Count - 1 do 6038 begin 6039 PrevROp := PrevReplaces[J] as IReplaceOp; 6040 if (PrevROp.Index >= ROp.Index) and (PrevROp.LastIndex <= ROp.LastIndex) then 6041 begin 6042 // delete replace as it's a no-op. 6043 Rewrites[PrevROp.InstructionIndex] := nil; 6044 Continue; 6045 end; 6046 // throw exception unless disjoint or identical 6047 Disjoint := (PrevROp.LastIndex < ROp.Index) or (PrevROp.Index > ROp.LastIndex); 6048 Same := (PrevROp.Index = ROp.Index) and (PrevROp.LastIndex = ROp.LastIndex); 6049 if (not Disjoint) and (not Same) then 6050 raise EArgumentOutOfRangeException.Create('replace of boundaries of ' 6051 + ROp.ToString + ' overlap with previous ' + PrevROp.ToString); 6052 end; 6053 end; 6054 6055 // WALK INSERTS 6056 for I := 0 to Rewrites.Count - 1 do 6057 begin 6058 Op := Rewrites[I]; 6059 if (Op = nil) then 6060 Continue; 6061 if (not Supports(Op, IInsertBeforeOp, IOp)) then 6062 Continue; 6063 6064 // combine current insert with prior if any at same index 6065 PrevInserts := GetKindOfOps(Rewrites, IInsertBeforeOp, I); 6066 for J := 0 to PrevInserts.Count - 1 do 6067 begin 6068 PrevIOp := PrevInserts[J] as IInsertBeforeOp; 6069 if (PrevIOp.Index = IOp.Index) then 6070 begin 6071 // combine objects 6072 // convert to strings...we're in process of toString'ing 6073 // whole token buffer so no lazy eval issue with any templates 6074 IOp.Text := CatOpText(IOp.Text, PrevIOp.Text); 6075 // delete redundant prior insert 6076 Rewrites[PrevIOp.InstructionIndex] := nil; 6077 end; 6078 end; 6079 6080 // look for replaces where iop.index is in range; error 6081 PrevReplaces := GetKindOfOps(Rewrites, IReplaceOp, I); 6082 for J := 0 to PrevReplaces.Count - 1 do 6083 begin 6084 Rop := PrevReplaces[J] as IReplaceOp; 6085 if (IOp.Index = ROp.Index) then 6086 begin 6087 ROp.Text := CatOpText(IOp.Text, ROp.Text); 6088 Rewrites[I] := nil; // delete current insert 6089 Continue; 6090 end; 6091 if (IOp.Index >= ROp.Index) and (IOp.Index <= ROp.LastIndex) then 6092 raise EArgumentOutOfRangeException.Create('insert op ' 6093 + IOp.ToString + ' within boundaries of previous ' + ROp.ToString); 6094 end; 6095 end; 6096 6097 Result := TDictionary<Integer, IRewriteOperation>.Create; 6098 for Op in Rewrites do 6099 begin 6100 if (Op = nil) then 6101 Continue; // ignore deleted ops 6102 if (Result.ContainsKey(Op.Index)) then 6103 raise Exception.Create('should only be one op per index'); 6104 Result.Add(Op.Index, Op); 6105 end; 6106 end; 6107 6108 procedure TTokenRewriteStream.Replace(const ProgramName: String; const Start, 6109 Stop: IToken; const Text: IANTLRInterface); 6110 begin 6111 Replace(ProgramName, Start.TokenIndex, Stop.TokenIndex, Text); 6112 end; 6113 6114 procedure TTokenRewriteStream.Replace(const Index: Integer; 6115 const Text: IANTLRInterface); 6116 begin 6117 Replace(DEFAULT_PROGRAM_NAME, Index, Index, Text); 6118 end; 6119 6120 procedure TTokenRewriteStream.Replace(const Start, Stop: Integer; 6121 const Text: IANTLRInterface); 6122 begin 6123 Replace(DEFAULT_PROGRAM_NAME, Start, Stop, Text); 6124 end; 6125 6126 procedure TTokenRewriteStream.Rollback(const InstructionIndex: Integer); 6127 begin 6128 Rollback(DEFAULT_PROGRAM_NAME, InstructionIndex); 6129 end; 6130 6131 procedure TTokenRewriteStream.Rollback(const ProgramName: String; 6132 const InstructionIndex: Integer); 6133 var 6134 InstructionStream: IList<IRewriteOperation>; 6135 begin 6136 InstructionStream := FPrograms[ProgramName]; 6137 if Assigned(InstructionStream) then 6138 FPrograms[ProgramName] := InstructionStream.GetRange(MIN_TOKEN_INDEX, 6139 InstructionIndex - MIN_TOKEN_INDEX); 6140 end; 6141 6142 procedure TTokenRewriteStream.SetLastRewriteTokenIndex( 6143 const ProgramName: String; const I: Integer); 6144 begin 6145 FLastRewriteTokenIndexes[ProgramName] := I; 6146 end; 6147 6148 function TTokenRewriteStream.ToDebugString: String; 6149 begin 6150 Result := ToDebugString(MIN_TOKEN_INDEX, Size - 1); 6151 end; 6152 6153 function TTokenRewriteStream.ToDebugString(const Start, Stop: Integer): String; 6154 var 6155 Buf: TStringBuilder; 6156 I: Integer; 6157 begin 6158 Buf := TStringBuilder.Create; 6159 try 6160 if (Start >= MIN_TOKEN_INDEX) then 6161 for I := Start to Min(Stop,GetTokens.Count - 1) do 6162 Buf.Append(Get(I).ToString); 6163 finally 6164 Buf.Free; 6165 end; 6166 end; 6167 6168 function TTokenRewriteStream.ToOriginalString: String; 6169 begin 6170 Result := ToOriginalString(MIN_TOKEN_INDEX, Size - 1); 6171 end; 6172 6173 function TTokenRewriteStream.ToOriginalString(const Start, 6174 Stop: Integer): String; 6175 var 6176 Buf: TStringBuilder; 6177 I: Integer; 6178 begin 6179 Buf := TStringBuilder.Create; 6180 try 6181 if (Start >= MIN_TOKEN_INDEX) then 6182 for I := Start to Min(Stop, GetTokens.Count - 1) do 6183 Buf.Append(Get(I).Text); 6184 Result := Buf.ToString; 6185 finally 6186 Buf.Free; 6187 end; 6188 end; 6189 6190 function TTokenRewriteStream.ToString: String; 6191 begin 6192 Result := ToString(MIN_TOKEN_INDEX, Size - 1); 6193 end; 6194 6195 function TTokenRewriteStream.ToString(const ProgramName: String): String; 6196 begin 6197 Result := ToString(ProgramName, MIN_TOKEN_INDEX, Size - 1); 6198 end; 6199 6200 function TTokenRewriteStream.ToString(const ProgramName: String; const Start, 6201 Stop: Integer): String; 6202 var 6203 Rewrites: IList<IRewriteOperation>; 6204 I, StartIndex, StopIndex: Integer; 6205 IndexToOp: IDictionary<Integer, IRewriteOperation>; 6206 Buf: TStringBuilder; 6207 Tokens: IList<IToken>; 6208 T: IToken; 6209 Op: IRewriteOperation; 6210 Pair: TPair<Integer, IRewriteOperation>; 6211 begin 6212 Rewrites := FPrograms[ProgramName]; 6213 Tokens := GetTokens; 6214 // ensure start/end are in range 6215 StopIndex := Min(Stop,Tokens.Count - 1); 6216 StartIndex := Max(Start,0); 6217 6218 if (Rewrites = nil) or (Rewrites.Count = 0) then 6219 begin 6220 // no instructions to execute 6221 Result := ToOriginalString(StartIndex, StopIndex); 6222 Exit; 6223 end; 6224 6225 Buf := TStringBuilder.Create; 6226 try 6227 // First, optimize instruction stream 6228 IndexToOp := ReduceToSingleOperationPerIndex(Rewrites); 6229 6230 // Walk buffer, executing instructions and emitting tokens 6231 I := StartIndex; 6232 while (I <= StopIndex) and (I < Tokens.Count) do 6233 begin 6234 if (not IndexToOp.TryGetValue(I, Op)) then 6235 Op := nil; 6236 IndexToOp.Remove(I); // remove so any left have index size-1 6237 T := Tokens[I]; 6238 if (Op = nil) then 6239 begin 6240 // no operation at that index, just dump token 6241 Buf.Append(T.Text); 6242 Inc(I); // move to next token 6243 end 6244 else 6245 I := Op.Execute(Buf); // execute operation and skip 6246 end; 6247 6248 // include stuff after end if it's last index in buffer 6249 // So, if they did an insertAfter(lastValidIndex, "foo"), include 6250 // foo if end==lastValidIndex. 6251 if (StopIndex = Tokens.Count - 1) then 6252 begin 6253 // Scan any remaining operations after last token 6254 // should be included (they will be inserts). 6255 for Pair in IndexToOp do 6256 begin 6257 if (Pair.Value.Index >= Tokens.Count - 1) then 6258 Buf.Append(Pair.Value.Text.ToString); 6259 end; 6260 end; 6261 Result := Buf.ToString; 6262 finally 6263 Buf.Free; 6264 end; 6265 end; 6266 6267 function TTokenRewriteStream.ToString(const Start, Stop: Integer): String; 6268 begin 6269 Result := ToString(DEFAULT_PROGRAM_NAME, Start, Stop); 6270 end; 6271 6272 procedure TTokenRewriteStream.InsertBefore(const Index: Integer; 6273 const Text: String); 6274 var 6275 S: IANTLRString; 6276 begin 6277 S := TANTLRString.Create(Text); 6278 InsertBefore(Index, S); 6279 end; 6280 6281 procedure TTokenRewriteStream.InsertBefore(const T: IToken; const Text: String); 6282 var 6283 S: IANTLRString; 6284 begin 6285 S := TANTLRString.Create(Text); 6286 InsertBefore(T, S); 6287 end; 6288 6289 procedure TTokenRewriteStream.InsertBefore(const ProgramName: String; 6290 const Index: Integer; const Text: String); 6291 var 6292 S: IANTLRString; 6293 begin 6294 S := TANTLRString.Create(Text); 6295 InsertBefore(ProgramName, Index, S); 6296 end; 6297 6298 procedure TTokenRewriteStream.InsertBefore(const ProgramName: String; 6299 const T: IToken; const Text: String); 6300 var 6301 S: IANTLRString; 6302 begin 6303 S := TANTLRString.Create(Text); 6304 InsertBefore(ProgramName, T, S); 6305 end; 6306 6307 procedure TTokenRewriteStream.InsertAfter(const Index: Integer; 6308 const Text: String); 6309 var 6310 S: IANTLRString; 6311 begin 6312 S := TANTLRString.Create(Text); 6313 InsertAfter(Index,S); 6314 end; 6315 6316 procedure TTokenRewriteStream.InsertAfter(const T: IToken; const Text: String); 6317 var 6318 S: IANTLRString; 6319 begin 6320 S := TANTLRString.Create(Text); 6321 InsertAfter(T,S); 6322 end; 6323 6324 procedure TTokenRewriteStream.InsertAfter(const ProgramName: String; 6325 const Index: Integer; const Text: String); 6326 var 6327 S: IANTLRString; 6328 begin 6329 S := TANTLRString.Create(Text); 6330 InsertAfter(ProgramName,Index,S); 6331 end; 6332 6333 procedure TTokenRewriteStream.InsertAfter(const ProgramName: String; 6334 const T: IToken; const Text: String); 6335 var 6336 S: IANTLRString; 6337 begin 6338 S := TANTLRString.Create(Text); 6339 InsertAfter(ProgramName,T,S); 6340 end; 6341 6342 procedure TTokenRewriteStream.Replace(const IndexT: IToken; const Text: String); 6343 var 6344 S: IANTLRString; 6345 begin 6346 S := TANTLRString.Create(Text); 6347 Replace(IndexT, S); 6348 end; 6349 6350 procedure TTokenRewriteStream.Replace(const Start, Stop: Integer; 6351 const Text: String); 6352 var 6353 S: IANTLRString; 6354 begin 6355 S := TANTLRString.Create(Text); 6356 Replace(Start, Stop, S); 6357 end; 6358 6359 procedure TTokenRewriteStream.Replace(const Index: Integer; const Text: String); 6360 var 6361 S: IANTLRString; 6362 begin 6363 S := TANTLRString.Create(Text); 6364 Replace(Index, S); 6365 end; 6366 6367 procedure TTokenRewriteStream.Replace(const ProgramName: String; const Start, 6368 Stop: IToken; const Text: String); 6369 var 6370 S: IANTLRString; 6371 begin 6372 S := TANTLRString.Create(Text); 6373 Replace(ProgramName, Start, Stop, S); 6374 end; 6375 6376 procedure TTokenRewriteStream.Replace(const ProgramName: String; const Start, 6377 Stop: Integer; const Text: String); 6378 var 6379 S: IANTLRString; 6380 begin 6381 S := TANTLRString.Create(Text); 6382 Replace(ProgramName, Start, Stop, S); 6383 end; 6384 6385 procedure TTokenRewriteStream.Replace(const Start, Stop: IToken; 6386 const Text: String); 6387 var 6388 S: IANTLRString; 6389 begin 6390 S := TANTLRString.Create(Text); 6391 Replace(Start, Stop, S); 6392 end; 6393 6394 { TTokenRewriteStream.TRewriteOperation } 6395 6396 constructor TTokenRewriteStream.TRewriteOperation.Create(const AIndex: Integer; 6397 const AText: IANTLRInterface; const AParent: ITokenRewriteStream); 6398 begin 6399 inherited Create; 6400 FIndex := AIndex; 6401 FText := AText; 6402 FParent := Pointer(AParent); 6403 end; 6404 6405 function TTokenRewriteStream.TRewriteOperation.Execute( 6406 const Buf: TStringBuilder): Integer; 6407 begin 6408 Result := FIndex; 6409 end; 6410 6411 function TTokenRewriteStream.TRewriteOperation.GetIndex: Integer; 6412 begin 6413 Result := FIndex; 6414 end; 6415 6416 function TTokenRewriteStream.TRewriteOperation.GetInstructionIndex: Integer; 6417 begin 6418 Result := FInstructionIndex; 6419 end; 6420 6421 function TTokenRewriteStream.TRewriteOperation.GetParent: ITokenRewriteStream; 6422 begin 6423 Result := ITokenRewriteStream(FParent); 6424 end; 6425 6426 function TTokenRewriteStream.TRewriteOperation.GetText: IANTLRInterface; 6427 begin 6428 Result := FText; 6429 end; 6430 6431 procedure TTokenRewriteStream.TRewriteOperation.SetIndex(const Value: Integer); 6432 begin 6433 FIndex := Value; 6434 end; 6435 6436 procedure TTokenRewriteStream.TRewriteOperation.SetInstructionIndex( 6437 const Value: Integer); 6438 begin 6439 FInstructionIndex := Value; 6440 end; 6441 6442 procedure TTokenRewriteStream.TRewriteOperation.SetParent( 6443 const Value: ITokenRewriteStream); 6444 begin 6445 FParent := Pointer(Value); 6446 end; 6447 6448 procedure TTokenRewriteStream.TRewriteOperation.SetText( 6449 const Value: IANTLRInterface); 6450 begin 6451 FText := Value; 6452 end; 6453 6454 function TTokenRewriteStream.TRewriteOperation.ToString: String; 6455 var 6456 OpName: String; 6457 DollarIndex: Integer; 6458 begin 6459 OpName := ClassName; 6460 DollarIndex := Pos('$',OpName) - 1; // Delphi strings are 1-based 6461 if (DollarIndex >= 0) then 6462 OpName := Copy(OpName,DollarIndex + 1,Length(OpName) - (DollarIndex + 1)); 6463 Result := '<' + OpName + '@' + IntToStr(FIndex) + ':"' + FText.ToString + '">'; 6464 end; 6465 6466 { TTokenRewriteStream.TRewriteOpComparer<T> } 6467 6468 function TTokenRewriteStream.TRewriteOpComparer<T>.Compare(const Left, 6469 Right: T): Integer; 6470 begin 6471 if (Left.GetIndex < Right.GetIndex) then 6472 Result := -1 6473 else 6474 if (Left.GetIndex > Right.GetIndex) then 6475 Result := 1 6476 else 6477 Result := 0; 6478 end; 6479 6480 { TTokenRewriteStream.TInsertBeforeOp } 6481 6482 function TTokenRewriteStream.TInsertBeforeOp.Execute( 6483 const Buf: TStringBuilder): Integer; 6484 begin 6485 Buf.Append(Text.ToString); 6486 Buf.Append(Parent.Get(Index).Text); 6487 Result := Index + 1; 6488 end; 6489 6490 { TTokenRewriteStream.TReplaceOp } 6491 6492 constructor TTokenRewriteStream.TReplaceOp.Create(const AStart, AStop: Integer; 6493 const AText: IANTLRInterface; const AParent: ITokenRewriteStream); 6494 begin 6495 inherited Create(AStart, AText, AParent); 6496 FLastIndex := AStop; 6497 end; 6498 6499 function TTokenRewriteStream.TReplaceOp.Execute( 6500 const Buf: TStringBuilder): Integer; 6501 begin 6502 if (Text <> nil) then 6503 Buf.Append(Text.ToString); 6504 Result := FLastIndex + 1; 6505 end; 6506 6507 function TTokenRewriteStream.TReplaceOp.GetLastIndex: Integer; 6508 begin 6509 Result := FLastIndex; 6510 end; 6511 6512 procedure TTokenRewriteStream.TReplaceOp.SetLastIndex(const Value: Integer); 6513 begin 6514 FLastIndex := Value; 6515 end; 6516 6517 function TTokenRewriteStream.TReplaceOp.ToString: String; 6518 begin 6519 Result := '<ReplaceOp@' + IntToStr(Index) + '..' + IntToStr(FLastIndex) 6520 + ':"' + Text.ToString + '">'; 6521 end; 6522 6523 { TTokenRewriteStream.TDeleteOp } 6524 6525 function TTokenRewriteStream.TDeleteOp.ToString: String; 6526 begin 6527 Result := '<DeleteOp@' + IntToStr(Index) + '..' + IntToStr(FLastIndex) + '>'; 6528 end; 6529 6530 { Utilities } 6531 6532 var 6533 EmptyToken: IToken = nil; 6534 EmptyRuleReturnScope: IRuleReturnScope = nil; 6535 6536 function Def(const X: IToken): IToken; overload; 6537 begin 6538 if Assigned(X) then 6539 Result := X 6540 else 6541 begin 6542 if (EmptyToken = nil) then 6543 EmptyToken := TCommonToken.Create; 6544 Result := EmptyToken; 6545 end; 6546 end; 6547 6548 function Def(const X: IRuleReturnScope): IRuleReturnScope; 6549 begin 6550 if Assigned(X) then 6551 Result := X 6552 else 6553 begin 6554 if (EmptyRuleReturnScope = nil) then 6555 EmptyRuleReturnScope := TRuleReturnScope.Create; 6556 Result := EmptyRuleReturnScope; 6557 end; 6558 end; 6559 6560 initialization 6561 TToken.Initialize; 6562 6563 end. 6564