Home | History | Annotate | Download | only in jinja2
      1 # -*- coding: utf-8 -*-
      2 """
      3     jinja2.lexer
      4     ~~~~~~~~~~~~
      5 
      6     This module implements a Jinja / Python combination lexer. The
      7     `Lexer` class provided by this module is used to do some preprocessing
      8     for Jinja.
      9 
     10     On the one hand it filters out invalid operators like the bitshift
     11     operators we don't allow in templates. On the other hand it separates
     12     template code and python code in expressions.
     13 
     14     :copyright: (c) 2010 by the Jinja Team.
     15     :license: BSD, see LICENSE for more details.
     16 """
     17 import re
     18 from operator import itemgetter
     19 from collections import deque
     20 from jinja2.exceptions import TemplateSyntaxError
     21 from jinja2.utils import LRUCache, next
     22 
     23 
     24 # cache for the lexers. Exists in order to be able to have multiple
     25 # environments with the same lexer
     26 _lexer_cache = LRUCache(50)
     27 
     28 # static regular expressions
     29 whitespace_re = re.compile(r'\s+', re.U)
     30 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
     31                        r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S)
     32 integer_re = re.compile(r'\d+')
     33 
     34 # we use the unicode identifier rule if this python version is able
     35 # to handle unicode identifiers, otherwise the standard ASCII one.
     36 try:
     37     compile('f', '<unknown>', 'eval')
     38 except SyntaxError:
     39     name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
     40 else:
     41     from jinja2 import _stringdefs
     42     name_re = re.compile(r'[%s][%s]*' % (_stringdefs.xid_start,
     43                                          _stringdefs.xid_continue))
     44 
     45 float_re = re.compile(r'(?<!\.)\d+\.\d+')
     46 newline_re = re.compile(r'(\r\n|\r|\n)')
     47 
     48 # internal the tokens and keep references to them
     49 TOKEN_ADD = intern('add')
     50 TOKEN_ASSIGN = intern('assign')
     51 TOKEN_COLON = intern('colon')
     52 TOKEN_COMMA = intern('comma')
     53 TOKEN_DIV = intern('div')
     54 TOKEN_DOT = intern('dot')
     55 TOKEN_EQ = intern('eq')
     56 TOKEN_FLOORDIV = intern('floordiv')
     57 TOKEN_GT = intern('gt')
     58 TOKEN_GTEQ = intern('gteq')
     59 TOKEN_LBRACE = intern('lbrace')
     60 TOKEN_LBRACKET = intern('lbracket')
     61 TOKEN_LPAREN = intern('lparen')
     62 TOKEN_LT = intern('lt')
     63 TOKEN_LTEQ = intern('lteq')
     64 TOKEN_MOD = intern('mod')
     65 TOKEN_MUL = intern('mul')
     66 TOKEN_NE = intern('ne')
     67 TOKEN_PIPE = intern('pipe')
     68 TOKEN_POW = intern('pow')
     69 TOKEN_RBRACE = intern('rbrace')
     70 TOKEN_RBRACKET = intern('rbracket')
     71 TOKEN_RPAREN = intern('rparen')
     72 TOKEN_SEMICOLON = intern('semicolon')
     73 TOKEN_SUB = intern('sub')
     74 TOKEN_TILDE = intern('tilde')
     75 TOKEN_WHITESPACE = intern('whitespace')
     76 TOKEN_FLOAT = intern('float')
     77 TOKEN_INTEGER = intern('integer')
     78 TOKEN_NAME = intern('name')
     79 TOKEN_STRING = intern('string')
     80 TOKEN_OPERATOR = intern('operator')
     81 TOKEN_BLOCK_BEGIN = intern('block_begin')
     82 TOKEN_BLOCK_END = intern('block_end')
     83 TOKEN_VARIABLE_BEGIN = intern('variable_begin')
     84 TOKEN_VARIABLE_END = intern('variable_end')
     85 TOKEN_RAW_BEGIN = intern('raw_begin')
     86 TOKEN_RAW_END = intern('raw_end')
     87 TOKEN_COMMENT_BEGIN = intern('comment_begin')
     88 TOKEN_COMMENT_END = intern('comment_end')
     89 TOKEN_COMMENT = intern('comment')
     90 TOKEN_LINESTATEMENT_BEGIN = intern('linestatement_begin')
     91 TOKEN_LINESTATEMENT_END = intern('linestatement_end')
     92 TOKEN_LINECOMMENT_BEGIN = intern('linecomment_begin')
     93 TOKEN_LINECOMMENT_END = intern('linecomment_end')
     94 TOKEN_LINECOMMENT = intern('linecomment')
     95 TOKEN_DATA = intern('data')
     96 TOKEN_INITIAL = intern('initial')
     97 TOKEN_EOF = intern('eof')
     98 
     99 # bind operators to token types
    100 operators = {
    101     '+':            TOKEN_ADD,
    102     '-':            TOKEN_SUB,
    103     '/':            TOKEN_DIV,
    104     '//':           TOKEN_FLOORDIV,
    105     '*':            TOKEN_MUL,
    106     '%':            TOKEN_MOD,
    107     '**':           TOKEN_POW,
    108     '~':            TOKEN_TILDE,
    109     '[':            TOKEN_LBRACKET,
    110     ']':            TOKEN_RBRACKET,
    111     '(':            TOKEN_LPAREN,
    112     ')':            TOKEN_RPAREN,
    113     '{':            TOKEN_LBRACE,
    114     '}':            TOKEN_RBRACE,
    115     '==':           TOKEN_EQ,
    116     '!=':           TOKEN_NE,
    117     '>':            TOKEN_GT,
    118     '>=':           TOKEN_GTEQ,
    119     '<':            TOKEN_LT,
    120     '<=':           TOKEN_LTEQ,
    121     '=':            TOKEN_ASSIGN,
    122     '.':            TOKEN_DOT,
    123     ':':            TOKEN_COLON,
    124     '|':            TOKEN_PIPE,
    125     ',':            TOKEN_COMMA,
    126     ';':            TOKEN_SEMICOLON
    127 }
    128 
    129 reverse_operators = dict([(v, k) for k, v in operators.iteritems()])
    130 assert len(operators) == len(reverse_operators), 'operators dropped'
    131 operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
    132                          sorted(operators, key=lambda x: -len(x))))
    133 
    134 ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT,
    135                             TOKEN_COMMENT_END, TOKEN_WHITESPACE,
    136                             TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN,
    137                             TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT])
    138 ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA,
    139                              TOKEN_COMMENT, TOKEN_LINECOMMENT])
    140 
    141 
    142 def _describe_token_type(token_type):
    143     if token_type in reverse_operators:
    144         return reverse_operators[token_type]
    145     return {
    146         TOKEN_COMMENT_BEGIN:        'begin of comment',
    147         TOKEN_COMMENT_END:          'end of comment',
    148         TOKEN_COMMENT:              'comment',
    149         TOKEN_LINECOMMENT:          'comment',
    150         TOKEN_BLOCK_BEGIN:          'begin of statement block',
    151         TOKEN_BLOCK_END:            'end of statement block',
    152         TOKEN_VARIABLE_BEGIN:       'begin of print statement',
    153         TOKEN_VARIABLE_END:         'end of print statement',
    154         TOKEN_LINESTATEMENT_BEGIN:  'begin of line statement',
    155         TOKEN_LINESTATEMENT_END:    'end of line statement',
    156         TOKEN_DATA:                 'template data / text',
    157         TOKEN_EOF:                  'end of template'
    158     }.get(token_type, token_type)
    159 
    160 
    161 def describe_token(token):
    162     """Returns a description of the token."""
    163     if token.type == 'name':
    164         return token.value
    165     return _describe_token_type(token.type)
    166 
    167 
    168 def describe_token_expr(expr):
    169     """Like `describe_token` but for token expressions."""
    170     if ':' in expr:
    171         type, value = expr.split(':', 1)
    172         if type == 'name':
    173             return value
    174     else:
    175         type = expr
    176     return _describe_token_type(type)
    177 
    178 
    179 def count_newlines(value):
    180     """Count the number of newline characters in the string.  This is
    181     useful for extensions that filter a stream.
    182     """
    183     return len(newline_re.findall(value))
    184 
    185 
    186 def compile_rules(environment):
    187     """Compiles all the rules from the environment into a list of rules."""
    188     e = re.escape
    189     rules = [
    190         (len(environment.comment_start_string), 'comment',
    191          e(environment.comment_start_string)),
    192         (len(environment.block_start_string), 'block',
    193          e(environment.block_start_string)),
    194         (len(environment.variable_start_string), 'variable',
    195          e(environment.variable_start_string))
    196     ]
    197 
    198     if environment.line_statement_prefix is not None:
    199         rules.append((len(environment.line_statement_prefix), 'linestatement',
    200                       r'^\s*' + e(environment.line_statement_prefix)))
    201     if environment.line_comment_prefix is not None:
    202         rules.append((len(environment.line_comment_prefix), 'linecomment',
    203                       r'(?:^|(?<=\S))[^\S\r\n]*' +
    204                       e(environment.line_comment_prefix)))
    205 
    206     return [x[1:] for x in sorted(rules, reverse=True)]
    207 
    208 
    209 class Failure(object):
    210     """Class that raises a `TemplateSyntaxError` if called.
    211     Used by the `Lexer` to specify known errors.
    212     """
    213 
    214     def __init__(self, message, cls=TemplateSyntaxError):
    215         self.message = message
    216         self.error_class = cls
    217 
    218     def __call__(self, lineno, filename):
    219         raise self.error_class(self.message, lineno, filename)
    220 
    221 
    222 class Token(tuple):
    223     """Token class."""
    224     __slots__ = ()
    225     lineno, type, value = (property(itemgetter(x)) for x in range(3))
    226 
    227     def __new__(cls, lineno, type, value):
    228         return tuple.__new__(cls, (lineno, intern(str(type)), value))
    229 
    230     def __str__(self):
    231         if self.type in reverse_operators:
    232             return reverse_operators[self.type]
    233         elif self.type == 'name':
    234             return self.value
    235         return self.type
    236 
    237     def test(self, expr):
    238         """Test a token against a token expression.  This can either be a
    239         token type or ``'token_type:token_value'``.  This can only test
    240         against string values and types.
    241         """
    242         # here we do a regular string equality check as test_any is usually
    243         # passed an iterable of not interned strings.
    244         if self.type == expr:
    245             return True
    246         elif ':' in expr:
    247             return expr.split(':', 1) == [self.type, self.value]
    248         return False
    249 
    250     def test_any(self, *iterable):
    251         """Test against multiple token expressions."""
    252         for expr in iterable:
    253             if self.test(expr):
    254                 return True
    255         return False
    256 
    257     def __repr__(self):
    258         return 'Token(%r, %r, %r)' % (
    259             self.lineno,
    260             self.type,
    261             self.value
    262         )
    263 
    264 
    265 class TokenStreamIterator(object):
    266     """The iterator for tokenstreams.  Iterate over the stream
    267     until the eof token is reached.
    268     """
    269 
    270     def __init__(self, stream):
    271         self.stream = stream
    272 
    273     def __iter__(self):
    274         return self
    275 
    276     def next(self):
    277         token = self.stream.current
    278         if token.type is TOKEN_EOF:
    279             self.stream.close()
    280             raise StopIteration()
    281         next(self.stream)
    282         return token
    283 
    284 
    285 class TokenStream(object):
    286     """A token stream is an iterable that yields :class:`Token`\s.  The
    287     parser however does not iterate over it but calls :meth:`next` to go
    288     one token ahead.  The current active token is stored as :attr:`current`.
    289     """
    290 
    291     def __init__(self, generator, name, filename):
    292         self._next = iter(generator).next
    293         self._pushed = deque()
    294         self.name = name
    295         self.filename = filename
    296         self.closed = False
    297         self.current = Token(1, TOKEN_INITIAL, '')
    298         next(self)
    299 
    300     def __iter__(self):
    301         return TokenStreamIterator(self)
    302 
    303     def __nonzero__(self):
    304         return bool(self._pushed) or self.current.type is not TOKEN_EOF
    305 
    306     eos = property(lambda x: not x, doc="Are we at the end of the stream?")
    307 
    308     def push(self, token):
    309         """Push a token back to the stream."""
    310         self._pushed.append(token)
    311 
    312     def look(self):
    313         """Look at the next token."""
    314         old_token = next(self)
    315         result = self.current
    316         self.push(result)
    317         self.current = old_token
    318         return result
    319 
    320     def skip(self, n=1):
    321         """Got n tokens ahead."""
    322         for x in xrange(n):
    323             next(self)
    324 
    325     def next_if(self, expr):
    326         """Perform the token test and return the token if it matched.
    327         Otherwise the return value is `None`.
    328         """
    329         if self.current.test(expr):
    330             return next(self)
    331 
    332     def skip_if(self, expr):
    333         """Like :meth:`next_if` but only returns `True` or `False`."""
    334         return self.next_if(expr) is not None
    335 
    336     def next(self):
    337         """Go one token ahead and return the old one"""
    338         rv = self.current
    339         if self._pushed:
    340             self.current = self._pushed.popleft()
    341         elif self.current.type is not TOKEN_EOF:
    342             try:
    343                 self.current = self._next()
    344             except StopIteration:
    345                 self.close()
    346         return rv
    347 
    348     def close(self):
    349         """Close the stream."""
    350         self.current = Token(self.current.lineno, TOKEN_EOF, '')
    351         self._next = None
    352         self.closed = True
    353 
    354     def expect(self, expr):
    355         """Expect a given token type and return it.  This accepts the same
    356         argument as :meth:`jinja2.lexer.Token.test`.
    357         """
    358         if not self.current.test(expr):
    359             expr = describe_token_expr(expr)
    360             if self.current.type is TOKEN_EOF:
    361                 raise TemplateSyntaxError('unexpected end of template, '
    362                                           'expected %r.' % expr,
    363                                           self.current.lineno,
    364                                           self.name, self.filename)
    365             raise TemplateSyntaxError("expected token %r, got %r" %
    366                                       (expr, describe_token(self.current)),
    367                                       self.current.lineno,
    368                                       self.name, self.filename)
    369         try:
    370             return self.current
    371         finally:
    372             next(self)
    373 
    374 
    375 def get_lexer(environment):
    376     """Return a lexer which is probably cached."""
    377     key = (environment.block_start_string,
    378            environment.block_end_string,
    379            environment.variable_start_string,
    380            environment.variable_end_string,
    381            environment.comment_start_string,
    382            environment.comment_end_string,
    383            environment.line_statement_prefix,
    384            environment.line_comment_prefix,
    385            environment.trim_blocks,
    386            environment.newline_sequence)
    387     lexer = _lexer_cache.get(key)
    388     if lexer is None:
    389         lexer = Lexer(environment)
    390         _lexer_cache[key] = lexer
    391     return lexer
    392 
    393 
    394 class Lexer(object):
    395     """Class that implements a lexer for a given environment. Automatically
    396     created by the environment class, usually you don't have to do that.
    397 
    398     Note that the lexer is not automatically bound to an environment.
    399     Multiple environments can share the same lexer.
    400     """
    401 
    402     def __init__(self, environment):
    403         # shortcuts
    404         c = lambda x: re.compile(x, re.M | re.S)
    405         e = re.escape
    406 
    407         # lexing rules for tags
    408         tag_rules = [
    409             (whitespace_re, TOKEN_WHITESPACE, None),
    410             (float_re, TOKEN_FLOAT, None),
    411             (integer_re, TOKEN_INTEGER, None),
    412             (name_re, TOKEN_NAME, None),
    413             (string_re, TOKEN_STRING, None),
    414             (operator_re, TOKEN_OPERATOR, None)
    415         ]
    416 
    417         # assamble the root lexing rule. because "|" is ungreedy
    418         # we have to sort by length so that the lexer continues working
    419         # as expected when we have parsing rules like <% for block and
    420         # <%= for variables. (if someone wants asp like syntax)
    421         # variables are just part of the rules if variable processing
    422         # is required.
    423         root_tag_rules = compile_rules(environment)
    424 
    425         # block suffix if trimming is enabled
    426         block_suffix_re = environment.trim_blocks and '\\n?' or ''
    427 
    428         self.newline_sequence = environment.newline_sequence
    429 
    430         # global lexing rules
    431         self.rules = {
    432             'root': [
    433                 # directives
    434                 (c('(.*?)(?:%s)' % '|'.join(
    435                     [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % (
    436                         e(environment.block_start_string),
    437                         e(environment.block_start_string),
    438                         e(environment.block_end_string),
    439                         e(environment.block_end_string)
    440                     )] + [
    441                         r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, r)
    442                         for n, r in root_tag_rules
    443                     ])), (TOKEN_DATA, '#bygroup'), '#bygroup'),
    444                 # data
    445                 (c('.+'), TOKEN_DATA, None)
    446             ],
    447             # comments
    448             TOKEN_COMMENT_BEGIN: [
    449                 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
    450                     e(environment.comment_end_string),
    451                     e(environment.comment_end_string),
    452                     block_suffix_re
    453                 )), (TOKEN_COMMENT, TOKEN_COMMENT_END), '#pop'),
    454                 (c('(.)'), (Failure('Missing end of comment tag'),), None)
    455             ],
    456             # blocks
    457             TOKEN_BLOCK_BEGIN: [
    458                 (c('(?:\-%s\s*|%s)%s' % (
    459                     e(environment.block_end_string),
    460                     e(environment.block_end_string),
    461                     block_suffix_re
    462                 )), TOKEN_BLOCK_END, '#pop'),
    463             ] + tag_rules,
    464             # variables
    465             TOKEN_VARIABLE_BEGIN: [
    466                 (c('\-%s\s*|%s' % (
    467                     e(environment.variable_end_string),
    468                     e(environment.variable_end_string)
    469                 )), TOKEN_VARIABLE_END, '#pop')
    470             ] + tag_rules,
    471             # raw block
    472             TOKEN_RAW_BEGIN: [
    473                 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
    474                     e(environment.block_start_string),
    475                     e(environment.block_start_string),
    476                     e(environment.block_end_string),
    477                     e(environment.block_end_string),
    478                     block_suffix_re
    479                 )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'),
    480                 (c('(.)'), (Failure('Missing end of raw directive'),), None)
    481             ],
    482             # line statements
    483             TOKEN_LINESTATEMENT_BEGIN: [
    484                 (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop')
    485             ] + tag_rules,
    486             # line comments
    487             TOKEN_LINECOMMENT_BEGIN: [
    488                 (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT,
    489                  TOKEN_LINECOMMENT_END), '#pop')
    490             ]
    491         }
    492 
    493     def _normalize_newlines(self, value):
    494         """Called for strings and template data to normlize it to unicode."""
    495         return newline_re.sub(self.newline_sequence, value)
    496 
    497     def tokenize(self, source, name=None, filename=None, state=None):
    498         """Calls tokeniter + tokenize and wraps it in a token stream.
    499         """
    500         stream = self.tokeniter(source, name, filename, state)
    501         return TokenStream(self.wrap(stream, name, filename), name, filename)
    502 
    503     def wrap(self, stream, name=None, filename=None):
    504         """This is called with the stream as returned by `tokenize` and wraps
    505         every token in a :class:`Token` and converts the value.
    506         """
    507         for lineno, token, value in stream:
    508             if token in ignored_tokens:
    509                 continue
    510             elif token == 'linestatement_begin':
    511                 token = 'block_begin'
    512             elif token == 'linestatement_end':
    513                 token = 'block_end'
    514             # we are not interested in those tokens in the parser
    515             elif token in ('raw_begin', 'raw_end'):
    516                 continue
    517             elif token == 'data':
    518                 value = self._normalize_newlines(value)
    519             elif token == 'keyword':
    520                 token = value
    521             elif token == 'name':
    522                 value = str(value)
    523             elif token == 'string':
    524                 # try to unescape string
    525                 try:
    526                     value = self._normalize_newlines(value[1:-1]) \
    527                         .encode('ascii', 'backslashreplace') \
    528                         .decode('unicode-escape')
    529                 except Exception, e:
    530                     msg = str(e).split(':')[-1].strip()
    531                     raise TemplateSyntaxError(msg, lineno, name, filename)
    532                 # if we can express it as bytestring (ascii only)
    533                 # we do that for support of semi broken APIs
    534                 # as datetime.datetime.strftime.  On python 3 this
    535                 # call becomes a noop thanks to 2to3
    536                 try:
    537                     value = str(value)
    538                 except UnicodeError:
    539                     pass
    540             elif token == 'integer':
    541                 value = int(value)
    542             elif token == 'float':
    543                 value = float(value)
    544             elif token == 'operator':
    545                 token = operators[value]
    546             yield Token(lineno, token, value)
    547 
    548     def tokeniter(self, source, name, filename=None, state=None):
    549         """This method tokenizes the text and returns the tokens in a
    550         generator.  Use this method if you just want to tokenize a template.
    551         """
    552         source = '\n'.join(unicode(source).splitlines())
    553         pos = 0
    554         lineno = 1
    555         stack = ['root']
    556         if state is not None and state != 'root':
    557             assert state in ('variable', 'block'), 'invalid state'
    558             stack.append(state + '_begin')
    559         else:
    560             state = 'root'
    561         statetokens = self.rules[stack[-1]]
    562         source_length = len(source)
    563 
    564         balancing_stack = []
    565 
    566         while 1:
    567             # tokenizer loop
    568             for regex, tokens, new_state in statetokens:
    569                 m = regex.match(source, pos)
    570                 # if no match we try again with the next rule
    571                 if m is None:
    572                     continue
    573 
    574                 # we only match blocks and variables if brances / parentheses
    575                 # are balanced. continue parsing with the lower rule which
    576                 # is the operator rule. do this only if the end tags look
    577                 # like operators
    578                 if balancing_stack and \
    579                    tokens in ('variable_end', 'block_end',
    580                               'linestatement_end'):
    581                     continue
    582 
    583                 # tuples support more options
    584                 if isinstance(tokens, tuple):
    585                     for idx, token in enumerate(tokens):
    586                         # failure group
    587                         if token.__class__ is Failure:
    588                             raise token(lineno, filename)
    589                         # bygroup is a bit more complex, in that case we
    590                         # yield for the current token the first named
    591                         # group that matched
    592                         elif token == '#bygroup':
    593                             for key, value in m.groupdict().iteritems():
    594                                 if value is not None:
    595                                     yield lineno, key, value
    596                                     lineno += value.count('\n')
    597                                     break
    598                             else:
    599                                 raise RuntimeError('%r wanted to resolve '
    600                                                    'the token dynamically'
    601                                                    ' but no group matched'
    602                                                    % regex)
    603                         # normal group
    604                         else:
    605                             data = m.group(idx + 1)
    606                             if data or token not in ignore_if_empty:
    607                                 yield lineno, token, data
    608                             lineno += data.count('\n')
    609 
    610                 # strings as token just are yielded as it.
    611                 else:
    612                     data = m.group()
    613                     # update brace/parentheses balance
    614                     if tokens == 'operator':
    615                         if data == '{':
    616                             balancing_stack.append('}')
    617                         elif data == '(':
    618                             balancing_stack.append(')')
    619                         elif data == '[':
    620                             balancing_stack.append(']')
    621                         elif data in ('}', ')', ']'):
    622                             if not balancing_stack:
    623                                 raise TemplateSyntaxError('unexpected \'%s\'' %
    624                                                           data, lineno, name,
    625                                                           filename)
    626                             expected_op = balancing_stack.pop()
    627                             if expected_op != data:
    628                                 raise TemplateSyntaxError('unexpected \'%s\', '
    629                                                           'expected \'%s\'' %
    630                                                           (data, expected_op),
    631                                                           lineno, name,
    632                                                           filename)
    633                     # yield items
    634                     if data or tokens not in ignore_if_empty:
    635                         yield lineno, tokens, data
    636                     lineno += data.count('\n')
    637 
    638                 # fetch new position into new variable so that we can check
    639                 # if there is a internal parsing error which would result
    640                 # in an infinite loop
    641                 pos2 = m.end()
    642 
    643                 # handle state changes
    644                 if new_state is not None:
    645                     # remove the uppermost state
    646                     if new_state == '#pop':
    647                         stack.pop()
    648                     # resolve the new state by group checking
    649                     elif new_state == '#bygroup':
    650                         for key, value in m.groupdict().iteritems():
    651                             if value is not None:
    652                                 stack.append(key)
    653                                 break
    654                         else:
    655                             raise RuntimeError('%r wanted to resolve the '
    656                                                'new state dynamically but'
    657                                                ' no group matched' %
    658                                                regex)
    659                     # direct state name given
    660                     else:
    661                         stack.append(new_state)
    662                     statetokens = self.rules[stack[-1]]
    663                 # we are still at the same position and no stack change.
    664                 # this means a loop without break condition, avoid that and
    665                 # raise error
    666                 elif pos2 == pos:
    667                     raise RuntimeError('%r yielded empty string without '
    668                                        'stack change' % regex)
    669                 # publish new function and start again
    670                 pos = pos2
    671                 break
    672             # if loop terminated without break we havn't found a single match
    673             # either we are at the end of the file or we have a problem
    674             else:
    675                 # end of text
    676                 if pos >= source_length:
    677                     return
    678                 # something went wrong
    679                 raise TemplateSyntaxError('unexpected char %r at %d' %
    680                                           (source[pos], pos), lineno,
    681                                           name, filename)
    682