1 # -*- coding: utf-8 -*- 2 """ 3 jinja2.lexer 4 ~~~~~~~~~~~~ 5 6 This module implements a Jinja / Python combination lexer. The 7 `Lexer` class provided by this module is used to do some preprocessing 8 for Jinja. 9 10 On the one hand it filters out invalid operators like the bitshift 11 operators we don't allow in templates. On the other hand it separates 12 template code and python code in expressions. 13 14 :copyright: (c) 2010 by the Jinja Team. 15 :license: BSD, see LICENSE for more details. 16 """ 17 import re 18 19 from operator import itemgetter 20 from collections import deque 21 from jinja2.exceptions import TemplateSyntaxError 22 from jinja2.utils import LRUCache 23 from jinja2._compat import next, iteritems, implements_iterator, text_type, \ 24 intern 25 26 27 # cache for the lexers. Exists in order to be able to have multiple 28 # environments with the same lexer 29 _lexer_cache = LRUCache(50) 30 31 # static regular expressions 32 whitespace_re = re.compile(r'\s+', re.U) 33 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'" 34 r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S) 35 integer_re = re.compile(r'\d+') 36 37 # we use the unicode identifier rule if this python version is able 38 # to handle unicode identifiers, otherwise the standard ASCII one. 39 try: 40 compile('f', '<unknown>', 'eval') 41 except SyntaxError: 42 name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b') 43 else: 44 from jinja2 import _stringdefs 45 name_re = re.compile(r'[%s][%s]*' % (_stringdefs.xid_start, 46 _stringdefs.xid_continue)) 47 48 float_re = re.compile(r'(?<!\.)\d+\.\d+') 49 newline_re = re.compile(r'(\r\n|\r|\n)') 50 51 # internal the tokens and keep references to them 52 TOKEN_ADD = intern('add') 53 TOKEN_ASSIGN = intern('assign') 54 TOKEN_COLON = intern('colon') 55 TOKEN_COMMA = intern('comma') 56 TOKEN_DIV = intern('div') 57 TOKEN_DOT = intern('dot') 58 TOKEN_EQ = intern('eq') 59 TOKEN_FLOORDIV = intern('floordiv') 60 TOKEN_GT = intern('gt') 61 TOKEN_GTEQ = intern('gteq') 62 TOKEN_LBRACE = intern('lbrace') 63 TOKEN_LBRACKET = intern('lbracket') 64 TOKEN_LPAREN = intern('lparen') 65 TOKEN_LT = intern('lt') 66 TOKEN_LTEQ = intern('lteq') 67 TOKEN_MOD = intern('mod') 68 TOKEN_MUL = intern('mul') 69 TOKEN_NE = intern('ne') 70 TOKEN_PIPE = intern('pipe') 71 TOKEN_POW = intern('pow') 72 TOKEN_RBRACE = intern('rbrace') 73 TOKEN_RBRACKET = intern('rbracket') 74 TOKEN_RPAREN = intern('rparen') 75 TOKEN_SEMICOLON = intern('semicolon') 76 TOKEN_SUB = intern('sub') 77 TOKEN_TILDE = intern('tilde') 78 TOKEN_WHITESPACE = intern('whitespace') 79 TOKEN_FLOAT = intern('float') 80 TOKEN_INTEGER = intern('integer') 81 TOKEN_NAME = intern('name') 82 TOKEN_STRING = intern('string') 83 TOKEN_OPERATOR = intern('operator') 84 TOKEN_BLOCK_BEGIN = intern('block_begin') 85 TOKEN_BLOCK_END = intern('block_end') 86 TOKEN_VARIABLE_BEGIN = intern('variable_begin') 87 TOKEN_VARIABLE_END = intern('variable_end') 88 TOKEN_RAW_BEGIN = intern('raw_begin') 89 TOKEN_RAW_END = intern('raw_end') 90 TOKEN_COMMENT_BEGIN = intern('comment_begin') 91 TOKEN_COMMENT_END = intern('comment_end') 92 TOKEN_COMMENT = intern('comment') 93 TOKEN_LINESTATEMENT_BEGIN = intern('linestatement_begin') 94 TOKEN_LINESTATEMENT_END = intern('linestatement_end') 95 TOKEN_LINECOMMENT_BEGIN = intern('linecomment_begin') 96 TOKEN_LINECOMMENT_END = intern('linecomment_end') 97 TOKEN_LINECOMMENT = intern('linecomment') 98 TOKEN_DATA = intern('data') 99 TOKEN_INITIAL = intern('initial') 100 TOKEN_EOF = intern('eof') 101 102 # bind operators to token types 103 operators = { 104 '+': TOKEN_ADD, 105 '-': TOKEN_SUB, 106 '/': TOKEN_DIV, 107 '//': TOKEN_FLOORDIV, 108 '*': TOKEN_MUL, 109 '%': TOKEN_MOD, 110 '**': TOKEN_POW, 111 '~': TOKEN_TILDE, 112 '[': TOKEN_LBRACKET, 113 ']': TOKEN_RBRACKET, 114 '(': TOKEN_LPAREN, 115 ')': TOKEN_RPAREN, 116 '{': TOKEN_LBRACE, 117 '}': TOKEN_RBRACE, 118 '==': TOKEN_EQ, 119 '!=': TOKEN_NE, 120 '>': TOKEN_GT, 121 '>=': TOKEN_GTEQ, 122 '<': TOKEN_LT, 123 '<=': TOKEN_LTEQ, 124 '=': TOKEN_ASSIGN, 125 '.': TOKEN_DOT, 126 ':': TOKEN_COLON, 127 '|': TOKEN_PIPE, 128 ',': TOKEN_COMMA, 129 ';': TOKEN_SEMICOLON 130 } 131 132 reverse_operators = dict([(v, k) for k, v in iteritems(operators)]) 133 assert len(operators) == len(reverse_operators), 'operators dropped' 134 operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in 135 sorted(operators, key=lambda x: -len(x)))) 136 137 ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT, 138 TOKEN_COMMENT_END, TOKEN_WHITESPACE, 139 TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN, 140 TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT]) 141 ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA, 142 TOKEN_COMMENT, TOKEN_LINECOMMENT]) 143 144 145 def _describe_token_type(token_type): 146 if token_type in reverse_operators: 147 return reverse_operators[token_type] 148 return { 149 TOKEN_COMMENT_BEGIN: 'begin of comment', 150 TOKEN_COMMENT_END: 'end of comment', 151 TOKEN_COMMENT: 'comment', 152 TOKEN_LINECOMMENT: 'comment', 153 TOKEN_BLOCK_BEGIN: 'begin of statement block', 154 TOKEN_BLOCK_END: 'end of statement block', 155 TOKEN_VARIABLE_BEGIN: 'begin of print statement', 156 TOKEN_VARIABLE_END: 'end of print statement', 157 TOKEN_LINESTATEMENT_BEGIN: 'begin of line statement', 158 TOKEN_LINESTATEMENT_END: 'end of line statement', 159 TOKEN_DATA: 'template data / text', 160 TOKEN_EOF: 'end of template' 161 }.get(token_type, token_type) 162 163 164 def describe_token(token): 165 """Returns a description of the token.""" 166 if token.type == 'name': 167 return token.value 168 return _describe_token_type(token.type) 169 170 171 def describe_token_expr(expr): 172 """Like `describe_token` but for token expressions.""" 173 if ':' in expr: 174 type, value = expr.split(':', 1) 175 if type == 'name': 176 return value 177 else: 178 type = expr 179 return _describe_token_type(type) 180 181 182 def count_newlines(value): 183 """Count the number of newline characters in the string. This is 184 useful for extensions that filter a stream. 185 """ 186 return len(newline_re.findall(value)) 187 188 189 def compile_rules(environment): 190 """Compiles all the rules from the environment into a list of rules.""" 191 e = re.escape 192 rules = [ 193 (len(environment.comment_start_string), 'comment', 194 e(environment.comment_start_string)), 195 (len(environment.block_start_string), 'block', 196 e(environment.block_start_string)), 197 (len(environment.variable_start_string), 'variable', 198 e(environment.variable_start_string)) 199 ] 200 201 if environment.line_statement_prefix is not None: 202 rules.append((len(environment.line_statement_prefix), 'linestatement', 203 r'^[ \t\v]*' + e(environment.line_statement_prefix))) 204 if environment.line_comment_prefix is not None: 205 rules.append((len(environment.line_comment_prefix), 'linecomment', 206 r'(?:^|(?<=\S))[^\S\r\n]*' + 207 e(environment.line_comment_prefix))) 208 209 return [x[1:] for x in sorted(rules, reverse=True)] 210 211 212 class Failure(object): 213 """Class that raises a `TemplateSyntaxError` if called. 214 Used by the `Lexer` to specify known errors. 215 """ 216 217 def __init__(self, message, cls=TemplateSyntaxError): 218 self.message = message 219 self.error_class = cls 220 221 def __call__(self, lineno, filename): 222 raise self.error_class(self.message, lineno, filename) 223 224 225 class Token(tuple): 226 """Token class.""" 227 __slots__ = () 228 lineno, type, value = (property(itemgetter(x)) for x in range(3)) 229 230 def __new__(cls, lineno, type, value): 231 return tuple.__new__(cls, (lineno, intern(str(type)), value)) 232 233 def __str__(self): 234 if self.type in reverse_operators: 235 return reverse_operators[self.type] 236 elif self.type == 'name': 237 return self.value 238 return self.type 239 240 def test(self, expr): 241 """Test a token against a token expression. This can either be a 242 token type or ``'token_type:token_value'``. This can only test 243 against string values and types. 244 """ 245 # here we do a regular string equality check as test_any is usually 246 # passed an iterable of not interned strings. 247 if self.type == expr: 248 return True 249 elif ':' in expr: 250 return expr.split(':', 1) == [self.type, self.value] 251 return False 252 253 def test_any(self, *iterable): 254 """Test against multiple token expressions.""" 255 for expr in iterable: 256 if self.test(expr): 257 return True 258 return False 259 260 def __repr__(self): 261 return 'Token(%r, %r, %r)' % ( 262 self.lineno, 263 self.type, 264 self.value 265 ) 266 267 268 @implements_iterator 269 class TokenStreamIterator(object): 270 """The iterator for tokenstreams. Iterate over the stream 271 until the eof token is reached. 272 """ 273 274 def __init__(self, stream): 275 self.stream = stream 276 277 def __iter__(self): 278 return self 279 280 def __next__(self): 281 token = self.stream.current 282 if token.type is TOKEN_EOF: 283 self.stream.close() 284 raise StopIteration() 285 next(self.stream) 286 return token 287 288 289 @implements_iterator 290 class TokenStream(object): 291 """A token stream is an iterable that yields :class:`Token`\s. The 292 parser however does not iterate over it but calls :meth:`next` to go 293 one token ahead. The current active token is stored as :attr:`current`. 294 """ 295 296 def __init__(self, generator, name, filename): 297 self._iter = iter(generator) 298 self._pushed = deque() 299 self.name = name 300 self.filename = filename 301 self.closed = False 302 self.current = Token(1, TOKEN_INITIAL, '') 303 next(self) 304 305 def __iter__(self): 306 return TokenStreamIterator(self) 307 308 def __bool__(self): 309 return bool(self._pushed) or self.current.type is not TOKEN_EOF 310 __nonzero__ = __bool__ # py2 311 312 eos = property(lambda x: not x, doc="Are we at the end of the stream?") 313 314 def push(self, token): 315 """Push a token back to the stream.""" 316 self._pushed.append(token) 317 318 def look(self): 319 """Look at the next token.""" 320 old_token = next(self) 321 result = self.current 322 self.push(result) 323 self.current = old_token 324 return result 325 326 def skip(self, n=1): 327 """Got n tokens ahead.""" 328 for x in range(n): 329 next(self) 330 331 def next_if(self, expr): 332 """Perform the token test and return the token if it matched. 333 Otherwise the return value is `None`. 334 """ 335 if self.current.test(expr): 336 return next(self) 337 338 def skip_if(self, expr): 339 """Like :meth:`next_if` but only returns `True` or `False`.""" 340 return self.next_if(expr) is not None 341 342 def __next__(self): 343 """Go one token ahead and return the old one""" 344 rv = self.current 345 if self._pushed: 346 self.current = self._pushed.popleft() 347 elif self.current.type is not TOKEN_EOF: 348 try: 349 self.current = next(self._iter) 350 except StopIteration: 351 self.close() 352 return rv 353 354 def close(self): 355 """Close the stream.""" 356 self.current = Token(self.current.lineno, TOKEN_EOF, '') 357 self._iter = None 358 self.closed = True 359 360 def expect(self, expr): 361 """Expect a given token type and return it. This accepts the same 362 argument as :meth:`jinja2.lexer.Token.test`. 363 """ 364 if not self.current.test(expr): 365 expr = describe_token_expr(expr) 366 if self.current.type is TOKEN_EOF: 367 raise TemplateSyntaxError('unexpected end of template, ' 368 'expected %r.' % expr, 369 self.current.lineno, 370 self.name, self.filename) 371 raise TemplateSyntaxError("expected token %r, got %r" % 372 (expr, describe_token(self.current)), 373 self.current.lineno, 374 self.name, self.filename) 375 try: 376 return self.current 377 finally: 378 next(self) 379 380 381 def get_lexer(environment): 382 """Return a lexer which is probably cached.""" 383 key = (environment.block_start_string, 384 environment.block_end_string, 385 environment.variable_start_string, 386 environment.variable_end_string, 387 environment.comment_start_string, 388 environment.comment_end_string, 389 environment.line_statement_prefix, 390 environment.line_comment_prefix, 391 environment.trim_blocks, 392 environment.lstrip_blocks, 393 environment.newline_sequence, 394 environment.keep_trailing_newline) 395 lexer = _lexer_cache.get(key) 396 if lexer is None: 397 lexer = Lexer(environment) 398 _lexer_cache[key] = lexer 399 return lexer 400 401 402 class Lexer(object): 403 """Class that implements a lexer for a given environment. Automatically 404 created by the environment class, usually you don't have to do that. 405 406 Note that the lexer is not automatically bound to an environment. 407 Multiple environments can share the same lexer. 408 """ 409 410 def __init__(self, environment): 411 # shortcuts 412 c = lambda x: re.compile(x, re.M | re.S) 413 e = re.escape 414 415 # lexing rules for tags 416 tag_rules = [ 417 (whitespace_re, TOKEN_WHITESPACE, None), 418 (float_re, TOKEN_FLOAT, None), 419 (integer_re, TOKEN_INTEGER, None), 420 (name_re, TOKEN_NAME, None), 421 (string_re, TOKEN_STRING, None), 422 (operator_re, TOKEN_OPERATOR, None) 423 ] 424 425 # assemble the root lexing rule. because "|" is ungreedy 426 # we have to sort by length so that the lexer continues working 427 # as expected when we have parsing rules like <% for block and 428 # <%= for variables. (if someone wants asp like syntax) 429 # variables are just part of the rules if variable processing 430 # is required. 431 root_tag_rules = compile_rules(environment) 432 433 # block suffix if trimming is enabled 434 block_suffix_re = environment.trim_blocks and '\\n?' or '' 435 436 # strip leading spaces if lstrip_blocks is enabled 437 prefix_re = {} 438 if environment.lstrip_blocks: 439 # use '{%+' to manually disable lstrip_blocks behavior 440 no_lstrip_re = e('+') 441 # detect overlap between block and variable or comment strings 442 block_diff = c(r'^%s(.*)' % e(environment.block_start_string)) 443 # make sure we don't mistake a block for a variable or a comment 444 m = block_diff.match(environment.comment_start_string) 445 no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' 446 m = block_diff.match(environment.variable_start_string) 447 no_lstrip_re += m and r'|%s' % e(m.group(1)) or '' 448 449 # detect overlap between comment and variable strings 450 comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string)) 451 m = comment_diff.match(environment.variable_start_string) 452 no_variable_re = m and r'(?!%s)' % e(m.group(1)) or '' 453 454 lstrip_re = r'^[ \t]*' 455 block_prefix_re = r'%s%s(?!%s)|%s\+?' % ( 456 lstrip_re, 457 e(environment.block_start_string), 458 no_lstrip_re, 459 e(environment.block_start_string), 460 ) 461 comment_prefix_re = r'%s%s%s|%s\+?' % ( 462 lstrip_re, 463 e(environment.comment_start_string), 464 no_variable_re, 465 e(environment.comment_start_string), 466 ) 467 prefix_re['block'] = block_prefix_re 468 prefix_re['comment'] = comment_prefix_re 469 else: 470 block_prefix_re = '%s' % e(environment.block_start_string) 471 472 self.newline_sequence = environment.newline_sequence 473 self.keep_trailing_newline = environment.keep_trailing_newline 474 475 # global lexing rules 476 self.rules = { 477 'root': [ 478 # directives 479 (c('(.*?)(?:%s)' % '|'.join( 480 [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % ( 481 e(environment.block_start_string), 482 block_prefix_re, 483 e(environment.block_end_string), 484 e(environment.block_end_string) 485 )] + [ 486 r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r)) 487 for n, r in root_tag_rules 488 ])), (TOKEN_DATA, '#bygroup'), '#bygroup'), 489 # data 490 (c('.+'), TOKEN_DATA, None) 491 ], 492 # comments 493 TOKEN_COMMENT_BEGIN: [ 494 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % ( 495 e(environment.comment_end_string), 496 e(environment.comment_end_string), 497 block_suffix_re 498 )), (TOKEN_COMMENT, TOKEN_COMMENT_END), '#pop'), 499 (c('(.)'), (Failure('Missing end of comment tag'),), None) 500 ], 501 # blocks 502 TOKEN_BLOCK_BEGIN: [ 503 (c('(?:\-%s\s*|%s)%s' % ( 504 e(environment.block_end_string), 505 e(environment.block_end_string), 506 block_suffix_re 507 )), TOKEN_BLOCK_END, '#pop'), 508 ] + tag_rules, 509 # variables 510 TOKEN_VARIABLE_BEGIN: [ 511 (c('\-%s\s*|%s' % ( 512 e(environment.variable_end_string), 513 e(environment.variable_end_string) 514 )), TOKEN_VARIABLE_END, '#pop') 515 ] + tag_rules, 516 # raw block 517 TOKEN_RAW_BEGIN: [ 518 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % ( 519 e(environment.block_start_string), 520 block_prefix_re, 521 e(environment.block_end_string), 522 e(environment.block_end_string), 523 block_suffix_re 524 )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'), 525 (c('(.)'), (Failure('Missing end of raw directive'),), None) 526 ], 527 # line statements 528 TOKEN_LINESTATEMENT_BEGIN: [ 529 (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop') 530 ] + tag_rules, 531 # line comments 532 TOKEN_LINECOMMENT_BEGIN: [ 533 (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT, 534 TOKEN_LINECOMMENT_END), '#pop') 535 ] 536 } 537 538 def _normalize_newlines(self, value): 539 """Called for strings and template data to normalize it to unicode.""" 540 return newline_re.sub(self.newline_sequence, value) 541 542 def tokenize(self, source, name=None, filename=None, state=None): 543 """Calls tokeniter + tokenize and wraps it in a token stream. 544 """ 545 stream = self.tokeniter(source, name, filename, state) 546 return TokenStream(self.wrap(stream, name, filename), name, filename) 547 548 def wrap(self, stream, name=None, filename=None): 549 """This is called with the stream as returned by `tokenize` and wraps 550 every token in a :class:`Token` and converts the value. 551 """ 552 for lineno, token, value in stream: 553 if token in ignored_tokens: 554 continue 555 elif token == 'linestatement_begin': 556 token = 'block_begin' 557 elif token == 'linestatement_end': 558 token = 'block_end' 559 # we are not interested in those tokens in the parser 560 elif token in ('raw_begin', 'raw_end'): 561 continue 562 elif token == 'data': 563 value = self._normalize_newlines(value) 564 elif token == 'keyword': 565 token = value 566 elif token == 'name': 567 value = str(value) 568 elif token == 'string': 569 # try to unescape string 570 try: 571 value = self._normalize_newlines(value[1:-1]) \ 572 .encode('ascii', 'backslashreplace') \ 573 .decode('unicode-escape') 574 except Exception as e: 575 msg = str(e).split(':')[-1].strip() 576 raise TemplateSyntaxError(msg, lineno, name, filename) 577 # if we can express it as bytestring (ascii only) 578 # we do that for support of semi broken APIs 579 # as datetime.datetime.strftime. On python 3 this 580 # call becomes a noop thanks to 2to3 581 try: 582 value = str(value) 583 except UnicodeError: 584 pass 585 elif token == 'integer': 586 value = int(value) 587 elif token == 'float': 588 value = float(value) 589 elif token == 'operator': 590 token = operators[value] 591 yield Token(lineno, token, value) 592 593 def tokeniter(self, source, name, filename=None, state=None): 594 """This method tokenizes the text and returns the tokens in a 595 generator. Use this method if you just want to tokenize a template. 596 """ 597 source = text_type(source) 598 lines = source.splitlines() 599 if self.keep_trailing_newline and source: 600 for newline in ('\r\n', '\r', '\n'): 601 if source.endswith(newline): 602 lines.append('') 603 break 604 source = '\n'.join(lines) 605 pos = 0 606 lineno = 1 607 stack = ['root'] 608 if state is not None and state != 'root': 609 assert state in ('variable', 'block'), 'invalid state' 610 stack.append(state + '_begin') 611 else: 612 state = 'root' 613 statetokens = self.rules[stack[-1]] 614 source_length = len(source) 615 616 balancing_stack = [] 617 618 while 1: 619 # tokenizer loop 620 for regex, tokens, new_state in statetokens: 621 m = regex.match(source, pos) 622 # if no match we try again with the next rule 623 if m is None: 624 continue 625 626 # we only match blocks and variables if braces / parentheses 627 # are balanced. continue parsing with the lower rule which 628 # is the operator rule. do this only if the end tags look 629 # like operators 630 if balancing_stack and \ 631 tokens in ('variable_end', 'block_end', 632 'linestatement_end'): 633 continue 634 635 # tuples support more options 636 if isinstance(tokens, tuple): 637 for idx, token in enumerate(tokens): 638 # failure group 639 if token.__class__ is Failure: 640 raise token(lineno, filename) 641 # bygroup is a bit more complex, in that case we 642 # yield for the current token the first named 643 # group that matched 644 elif token == '#bygroup': 645 for key, value in iteritems(m.groupdict()): 646 if value is not None: 647 yield lineno, key, value 648 lineno += value.count('\n') 649 break 650 else: 651 raise RuntimeError('%r wanted to resolve ' 652 'the token dynamically' 653 ' but no group matched' 654 % regex) 655 # normal group 656 else: 657 data = m.group(idx + 1) 658 if data or token not in ignore_if_empty: 659 yield lineno, token, data 660 lineno += data.count('\n') 661 662 # strings as token just are yielded as it. 663 else: 664 data = m.group() 665 # update brace/parentheses balance 666 if tokens == 'operator': 667 if data == '{': 668 balancing_stack.append('}') 669 elif data == '(': 670 balancing_stack.append(')') 671 elif data == '[': 672 balancing_stack.append(']') 673 elif data in ('}', ')', ']'): 674 if not balancing_stack: 675 raise TemplateSyntaxError('unexpected \'%s\'' % 676 data, lineno, name, 677 filename) 678 expected_op = balancing_stack.pop() 679 if expected_op != data: 680 raise TemplateSyntaxError('unexpected \'%s\', ' 681 'expected \'%s\'' % 682 (data, expected_op), 683 lineno, name, 684 filename) 685 # yield items 686 if data or tokens not in ignore_if_empty: 687 yield lineno, tokens, data 688 lineno += data.count('\n') 689 690 # fetch new position into new variable so that we can check 691 # if there is a internal parsing error which would result 692 # in an infinite loop 693 pos2 = m.end() 694 695 # handle state changes 696 if new_state is not None: 697 # remove the uppermost state 698 if new_state == '#pop': 699 stack.pop() 700 # resolve the new state by group checking 701 elif new_state == '#bygroup': 702 for key, value in iteritems(m.groupdict()): 703 if value is not None: 704 stack.append(key) 705 break 706 else: 707 raise RuntimeError('%r wanted to resolve the ' 708 'new state dynamically but' 709 ' no group matched' % 710 regex) 711 # direct state name given 712 else: 713 stack.append(new_state) 714 statetokens = self.rules[stack[-1]] 715 # we are still at the same position and no stack change. 716 # this means a loop without break condition, avoid that and 717 # raise error 718 elif pos2 == pos: 719 raise RuntimeError('%r yielded empty string without ' 720 'stack change' % regex) 721 # publish new function and start again 722 pos = pos2 723 break 724 # if loop terminated without break we haven't found a single match 725 # either we are at the end of the file or we have a problem 726 else: 727 # end of text 728 if pos >= source_length: 729 return 730 # something went wrong 731 raise TemplateSyntaxError('unexpected char %r at %d' % 732 (source[pos], pos), lineno, 733 name, filename) 734