Home | History | Annotate | Download | only in jinja2
      1 # -*- coding: utf-8 -*-
      2 """
      3     jinja2.lexer
      4     ~~~~~~~~~~~~
      5 
      6     This module implements a Jinja / Python combination lexer. The
      7     `Lexer` class provided by this module is used to do some preprocessing
      8     for Jinja.
      9 
     10     On the one hand it filters out invalid operators like the bitshift
     11     operators we don't allow in templates. On the other hand it separates
     12     template code and python code in expressions.
     13 
     14     :copyright: (c) 2010 by the Jinja Team.
     15     :license: BSD, see LICENSE for more details.
     16 """
     17 import re
     18 
     19 from operator import itemgetter
     20 from collections import deque
     21 from jinja2.exceptions import TemplateSyntaxError
     22 from jinja2.utils import LRUCache
     23 from jinja2._compat import next, iteritems, implements_iterator, text_type, \
     24      intern
     25 
     26 
     27 # cache for the lexers. Exists in order to be able to have multiple
     28 # environments with the same lexer
     29 _lexer_cache = LRUCache(50)
     30 
     31 # static regular expressions
     32 whitespace_re = re.compile(r'\s+', re.U)
     33 string_re = re.compile(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
     34                        r'|"([^"\\]*(?:\\.[^"\\]*)*)")', re.S)
     35 integer_re = re.compile(r'\d+')
     36 
     37 # we use the unicode identifier rule if this python version is able
     38 # to handle unicode identifiers, otherwise the standard ASCII one.
     39 try:
     40     compile('f', '<unknown>', 'eval')
     41 except SyntaxError:
     42     name_re = re.compile(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b')
     43 else:
     44     from jinja2 import _stringdefs
     45     name_re = re.compile(r'[%s][%s]*' % (_stringdefs.xid_start,
     46                                          _stringdefs.xid_continue))
     47 
     48 float_re = re.compile(r'(?<!\.)\d+\.\d+')
     49 newline_re = re.compile(r'(\r\n|\r|\n)')
     50 
     51 # internal the tokens and keep references to them
     52 TOKEN_ADD = intern('add')
     53 TOKEN_ASSIGN = intern('assign')
     54 TOKEN_COLON = intern('colon')
     55 TOKEN_COMMA = intern('comma')
     56 TOKEN_DIV = intern('div')
     57 TOKEN_DOT = intern('dot')
     58 TOKEN_EQ = intern('eq')
     59 TOKEN_FLOORDIV = intern('floordiv')
     60 TOKEN_GT = intern('gt')
     61 TOKEN_GTEQ = intern('gteq')
     62 TOKEN_LBRACE = intern('lbrace')
     63 TOKEN_LBRACKET = intern('lbracket')
     64 TOKEN_LPAREN = intern('lparen')
     65 TOKEN_LT = intern('lt')
     66 TOKEN_LTEQ = intern('lteq')
     67 TOKEN_MOD = intern('mod')
     68 TOKEN_MUL = intern('mul')
     69 TOKEN_NE = intern('ne')
     70 TOKEN_PIPE = intern('pipe')
     71 TOKEN_POW = intern('pow')
     72 TOKEN_RBRACE = intern('rbrace')
     73 TOKEN_RBRACKET = intern('rbracket')
     74 TOKEN_RPAREN = intern('rparen')
     75 TOKEN_SEMICOLON = intern('semicolon')
     76 TOKEN_SUB = intern('sub')
     77 TOKEN_TILDE = intern('tilde')
     78 TOKEN_WHITESPACE = intern('whitespace')
     79 TOKEN_FLOAT = intern('float')
     80 TOKEN_INTEGER = intern('integer')
     81 TOKEN_NAME = intern('name')
     82 TOKEN_STRING = intern('string')
     83 TOKEN_OPERATOR = intern('operator')
     84 TOKEN_BLOCK_BEGIN = intern('block_begin')
     85 TOKEN_BLOCK_END = intern('block_end')
     86 TOKEN_VARIABLE_BEGIN = intern('variable_begin')
     87 TOKEN_VARIABLE_END = intern('variable_end')
     88 TOKEN_RAW_BEGIN = intern('raw_begin')
     89 TOKEN_RAW_END = intern('raw_end')
     90 TOKEN_COMMENT_BEGIN = intern('comment_begin')
     91 TOKEN_COMMENT_END = intern('comment_end')
     92 TOKEN_COMMENT = intern('comment')
     93 TOKEN_LINESTATEMENT_BEGIN = intern('linestatement_begin')
     94 TOKEN_LINESTATEMENT_END = intern('linestatement_end')
     95 TOKEN_LINECOMMENT_BEGIN = intern('linecomment_begin')
     96 TOKEN_LINECOMMENT_END = intern('linecomment_end')
     97 TOKEN_LINECOMMENT = intern('linecomment')
     98 TOKEN_DATA = intern('data')
     99 TOKEN_INITIAL = intern('initial')
    100 TOKEN_EOF = intern('eof')
    101 
    102 # bind operators to token types
    103 operators = {
    104     '+':            TOKEN_ADD,
    105     '-':            TOKEN_SUB,
    106     '/':            TOKEN_DIV,
    107     '//':           TOKEN_FLOORDIV,
    108     '*':            TOKEN_MUL,
    109     '%':            TOKEN_MOD,
    110     '**':           TOKEN_POW,
    111     '~':            TOKEN_TILDE,
    112     '[':            TOKEN_LBRACKET,
    113     ']':            TOKEN_RBRACKET,
    114     '(':            TOKEN_LPAREN,
    115     ')':            TOKEN_RPAREN,
    116     '{':            TOKEN_LBRACE,
    117     '}':            TOKEN_RBRACE,
    118     '==':           TOKEN_EQ,
    119     '!=':           TOKEN_NE,
    120     '>':            TOKEN_GT,
    121     '>=':           TOKEN_GTEQ,
    122     '<':            TOKEN_LT,
    123     '<=':           TOKEN_LTEQ,
    124     '=':            TOKEN_ASSIGN,
    125     '.':            TOKEN_DOT,
    126     ':':            TOKEN_COLON,
    127     '|':            TOKEN_PIPE,
    128     ',':            TOKEN_COMMA,
    129     ';':            TOKEN_SEMICOLON
    130 }
    131 
    132 reverse_operators = dict([(v, k) for k, v in iteritems(operators)])
    133 assert len(operators) == len(reverse_operators), 'operators dropped'
    134 operator_re = re.compile('(%s)' % '|'.join(re.escape(x) for x in
    135                          sorted(operators, key=lambda x: -len(x))))
    136 
    137 ignored_tokens = frozenset([TOKEN_COMMENT_BEGIN, TOKEN_COMMENT,
    138                             TOKEN_COMMENT_END, TOKEN_WHITESPACE,
    139                             TOKEN_WHITESPACE, TOKEN_LINECOMMENT_BEGIN,
    140                             TOKEN_LINECOMMENT_END, TOKEN_LINECOMMENT])
    141 ignore_if_empty = frozenset([TOKEN_WHITESPACE, TOKEN_DATA,
    142                              TOKEN_COMMENT, TOKEN_LINECOMMENT])
    143 
    144 
    145 def _describe_token_type(token_type):
    146     if token_type in reverse_operators:
    147         return reverse_operators[token_type]
    148     return {
    149         TOKEN_COMMENT_BEGIN:        'begin of comment',
    150         TOKEN_COMMENT_END:          'end of comment',
    151         TOKEN_COMMENT:              'comment',
    152         TOKEN_LINECOMMENT:          'comment',
    153         TOKEN_BLOCK_BEGIN:          'begin of statement block',
    154         TOKEN_BLOCK_END:            'end of statement block',
    155         TOKEN_VARIABLE_BEGIN:       'begin of print statement',
    156         TOKEN_VARIABLE_END:         'end of print statement',
    157         TOKEN_LINESTATEMENT_BEGIN:  'begin of line statement',
    158         TOKEN_LINESTATEMENT_END:    'end of line statement',
    159         TOKEN_DATA:                 'template data / text',
    160         TOKEN_EOF:                  'end of template'
    161     }.get(token_type, token_type)
    162 
    163 
    164 def describe_token(token):
    165     """Returns a description of the token."""
    166     if token.type == 'name':
    167         return token.value
    168     return _describe_token_type(token.type)
    169 
    170 
    171 def describe_token_expr(expr):
    172     """Like `describe_token` but for token expressions."""
    173     if ':' in expr:
    174         type, value = expr.split(':', 1)
    175         if type == 'name':
    176             return value
    177     else:
    178         type = expr
    179     return _describe_token_type(type)
    180 
    181 
    182 def count_newlines(value):
    183     """Count the number of newline characters in the string.  This is
    184     useful for extensions that filter a stream.
    185     """
    186     return len(newline_re.findall(value))
    187 
    188 
    189 def compile_rules(environment):
    190     """Compiles all the rules from the environment into a list of rules."""
    191     e = re.escape
    192     rules = [
    193         (len(environment.comment_start_string), 'comment',
    194          e(environment.comment_start_string)),
    195         (len(environment.block_start_string), 'block',
    196          e(environment.block_start_string)),
    197         (len(environment.variable_start_string), 'variable',
    198          e(environment.variable_start_string))
    199     ]
    200 
    201     if environment.line_statement_prefix is not None:
    202         rules.append((len(environment.line_statement_prefix), 'linestatement',
    203                       r'^[ \t\v]*' + e(environment.line_statement_prefix)))
    204     if environment.line_comment_prefix is not None:
    205         rules.append((len(environment.line_comment_prefix), 'linecomment',
    206                       r'(?:^|(?<=\S))[^\S\r\n]*' +
    207                       e(environment.line_comment_prefix)))
    208 
    209     return [x[1:] for x in sorted(rules, reverse=True)]
    210 
    211 
    212 class Failure(object):
    213     """Class that raises a `TemplateSyntaxError` if called.
    214     Used by the `Lexer` to specify known errors.
    215     """
    216 
    217     def __init__(self, message, cls=TemplateSyntaxError):
    218         self.message = message
    219         self.error_class = cls
    220 
    221     def __call__(self, lineno, filename):
    222         raise self.error_class(self.message, lineno, filename)
    223 
    224 
    225 class Token(tuple):
    226     """Token class."""
    227     __slots__ = ()
    228     lineno, type, value = (property(itemgetter(x)) for x in range(3))
    229 
    230     def __new__(cls, lineno, type, value):
    231         return tuple.__new__(cls, (lineno, intern(str(type)), value))
    232 
    233     def __str__(self):
    234         if self.type in reverse_operators:
    235             return reverse_operators[self.type]
    236         elif self.type == 'name':
    237             return self.value
    238         return self.type
    239 
    240     def test(self, expr):
    241         """Test a token against a token expression.  This can either be a
    242         token type or ``'token_type:token_value'``.  This can only test
    243         against string values and types.
    244         """
    245         # here we do a regular string equality check as test_any is usually
    246         # passed an iterable of not interned strings.
    247         if self.type == expr:
    248             return True
    249         elif ':' in expr:
    250             return expr.split(':', 1) == [self.type, self.value]
    251         return False
    252 
    253     def test_any(self, *iterable):
    254         """Test against multiple token expressions."""
    255         for expr in iterable:
    256             if self.test(expr):
    257                 return True
    258         return False
    259 
    260     def __repr__(self):
    261         return 'Token(%r, %r, %r)' % (
    262             self.lineno,
    263             self.type,
    264             self.value
    265         )
    266 
    267 
    268 @implements_iterator
    269 class TokenStreamIterator(object):
    270     """The iterator for tokenstreams.  Iterate over the stream
    271     until the eof token is reached.
    272     """
    273 
    274     def __init__(self, stream):
    275         self.stream = stream
    276 
    277     def __iter__(self):
    278         return self
    279 
    280     def __next__(self):
    281         token = self.stream.current
    282         if token.type is TOKEN_EOF:
    283             self.stream.close()
    284             raise StopIteration()
    285         next(self.stream)
    286         return token
    287 
    288 
    289 @implements_iterator
    290 class TokenStream(object):
    291     """A token stream is an iterable that yields :class:`Token`\s.  The
    292     parser however does not iterate over it but calls :meth:`next` to go
    293     one token ahead.  The current active token is stored as :attr:`current`.
    294     """
    295 
    296     def __init__(self, generator, name, filename):
    297         self._iter = iter(generator)
    298         self._pushed = deque()
    299         self.name = name
    300         self.filename = filename
    301         self.closed = False
    302         self.current = Token(1, TOKEN_INITIAL, '')
    303         next(self)
    304 
    305     def __iter__(self):
    306         return TokenStreamIterator(self)
    307 
    308     def __bool__(self):
    309         return bool(self._pushed) or self.current.type is not TOKEN_EOF
    310     __nonzero__ = __bool__  # py2
    311 
    312     eos = property(lambda x: not x, doc="Are we at the end of the stream?")
    313 
    314     def push(self, token):
    315         """Push a token back to the stream."""
    316         self._pushed.append(token)
    317 
    318     def look(self):
    319         """Look at the next token."""
    320         old_token = next(self)
    321         result = self.current
    322         self.push(result)
    323         self.current = old_token
    324         return result
    325 
    326     def skip(self, n=1):
    327         """Got n tokens ahead."""
    328         for x in range(n):
    329             next(self)
    330 
    331     def next_if(self, expr):
    332         """Perform the token test and return the token if it matched.
    333         Otherwise the return value is `None`.
    334         """
    335         if self.current.test(expr):
    336             return next(self)
    337 
    338     def skip_if(self, expr):
    339         """Like :meth:`next_if` but only returns `True` or `False`."""
    340         return self.next_if(expr) is not None
    341 
    342     def __next__(self):
    343         """Go one token ahead and return the old one"""
    344         rv = self.current
    345         if self._pushed:
    346             self.current = self._pushed.popleft()
    347         elif self.current.type is not TOKEN_EOF:
    348             try:
    349                 self.current = next(self._iter)
    350             except StopIteration:
    351                 self.close()
    352         return rv
    353 
    354     def close(self):
    355         """Close the stream."""
    356         self.current = Token(self.current.lineno, TOKEN_EOF, '')
    357         self._iter = None
    358         self.closed = True
    359 
    360     def expect(self, expr):
    361         """Expect a given token type and return it.  This accepts the same
    362         argument as :meth:`jinja2.lexer.Token.test`.
    363         """
    364         if not self.current.test(expr):
    365             expr = describe_token_expr(expr)
    366             if self.current.type is TOKEN_EOF:
    367                 raise TemplateSyntaxError('unexpected end of template, '
    368                                           'expected %r.' % expr,
    369                                           self.current.lineno,
    370                                           self.name, self.filename)
    371             raise TemplateSyntaxError("expected token %r, got %r" %
    372                                       (expr, describe_token(self.current)),
    373                                       self.current.lineno,
    374                                       self.name, self.filename)
    375         try:
    376             return self.current
    377         finally:
    378             next(self)
    379 
    380 
    381 def get_lexer(environment):
    382     """Return a lexer which is probably cached."""
    383     key = (environment.block_start_string,
    384            environment.block_end_string,
    385            environment.variable_start_string,
    386            environment.variable_end_string,
    387            environment.comment_start_string,
    388            environment.comment_end_string,
    389            environment.line_statement_prefix,
    390            environment.line_comment_prefix,
    391            environment.trim_blocks,
    392            environment.lstrip_blocks,
    393            environment.newline_sequence,
    394            environment.keep_trailing_newline)
    395     lexer = _lexer_cache.get(key)
    396     if lexer is None:
    397         lexer = Lexer(environment)
    398         _lexer_cache[key] = lexer
    399     return lexer
    400 
    401 
    402 class Lexer(object):
    403     """Class that implements a lexer for a given environment. Automatically
    404     created by the environment class, usually you don't have to do that.
    405 
    406     Note that the lexer is not automatically bound to an environment.
    407     Multiple environments can share the same lexer.
    408     """
    409 
    410     def __init__(self, environment):
    411         # shortcuts
    412         c = lambda x: re.compile(x, re.M | re.S)
    413         e = re.escape
    414 
    415         # lexing rules for tags
    416         tag_rules = [
    417             (whitespace_re, TOKEN_WHITESPACE, None),
    418             (float_re, TOKEN_FLOAT, None),
    419             (integer_re, TOKEN_INTEGER, None),
    420             (name_re, TOKEN_NAME, None),
    421             (string_re, TOKEN_STRING, None),
    422             (operator_re, TOKEN_OPERATOR, None)
    423         ]
    424 
    425         # assemble the root lexing rule. because "|" is ungreedy
    426         # we have to sort by length so that the lexer continues working
    427         # as expected when we have parsing rules like <% for block and
    428         # <%= for variables. (if someone wants asp like syntax)
    429         # variables are just part of the rules if variable processing
    430         # is required.
    431         root_tag_rules = compile_rules(environment)
    432 
    433         # block suffix if trimming is enabled
    434         block_suffix_re = environment.trim_blocks and '\\n?' or ''
    435 
    436         # strip leading spaces if lstrip_blocks is enabled
    437         prefix_re = {}
    438         if environment.lstrip_blocks:
    439             # use '{%+' to manually disable lstrip_blocks behavior
    440             no_lstrip_re = e('+')
    441             # detect overlap between block and variable or comment strings
    442             block_diff = c(r'^%s(.*)' % e(environment.block_start_string))
    443             # make sure we don't mistake a block for a variable or a comment
    444             m = block_diff.match(environment.comment_start_string)
    445             no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
    446             m = block_diff.match(environment.variable_start_string)
    447             no_lstrip_re += m and r'|%s' % e(m.group(1)) or ''
    448 
    449             # detect overlap between comment and variable strings
    450             comment_diff = c(r'^%s(.*)' % e(environment.comment_start_string))
    451             m = comment_diff.match(environment.variable_start_string)
    452             no_variable_re = m and r'(?!%s)' % e(m.group(1)) or ''
    453 
    454             lstrip_re = r'^[ \t]*'
    455             block_prefix_re = r'%s%s(?!%s)|%s\+?' % (
    456                     lstrip_re,
    457                     e(environment.block_start_string),
    458                     no_lstrip_re,
    459                     e(environment.block_start_string),
    460                     )
    461             comment_prefix_re = r'%s%s%s|%s\+?' % (
    462                     lstrip_re,
    463                     e(environment.comment_start_string),
    464                     no_variable_re,
    465                     e(environment.comment_start_string),
    466                     )
    467             prefix_re['block'] = block_prefix_re
    468             prefix_re['comment'] = comment_prefix_re
    469         else:
    470             block_prefix_re = '%s' % e(environment.block_start_string)
    471 
    472         self.newline_sequence = environment.newline_sequence
    473         self.keep_trailing_newline = environment.keep_trailing_newline
    474 
    475         # global lexing rules
    476         self.rules = {
    477             'root': [
    478                 # directives
    479                 (c('(.*?)(?:%s)' % '|'.join(
    480                     [r'(?P<raw_begin>(?:\s*%s\-|%s)\s*raw\s*(?:\-%s\s*|%s))' % (
    481                         e(environment.block_start_string),
    482                         block_prefix_re,
    483                         e(environment.block_end_string),
    484                         e(environment.block_end_string)
    485                     )] + [
    486                         r'(?P<%s_begin>\s*%s\-|%s)' % (n, r, prefix_re.get(n,r))
    487                         for n, r in root_tag_rules
    488                     ])), (TOKEN_DATA, '#bygroup'), '#bygroup'),
    489                 # data
    490                 (c('.+'), TOKEN_DATA, None)
    491             ],
    492             # comments
    493             TOKEN_COMMENT_BEGIN: [
    494                 (c(r'(.*?)((?:\-%s\s*|%s)%s)' % (
    495                     e(environment.comment_end_string),
    496                     e(environment.comment_end_string),
    497                     block_suffix_re
    498                 )), (TOKEN_COMMENT, TOKEN_COMMENT_END), '#pop'),
    499                 (c('(.)'), (Failure('Missing end of comment tag'),), None)
    500             ],
    501             # blocks
    502             TOKEN_BLOCK_BEGIN: [
    503                 (c('(?:\-%s\s*|%s)%s' % (
    504                     e(environment.block_end_string),
    505                     e(environment.block_end_string),
    506                     block_suffix_re
    507                 )), TOKEN_BLOCK_END, '#pop'),
    508             ] + tag_rules,
    509             # variables
    510             TOKEN_VARIABLE_BEGIN: [
    511                 (c('\-%s\s*|%s' % (
    512                     e(environment.variable_end_string),
    513                     e(environment.variable_end_string)
    514                 )), TOKEN_VARIABLE_END, '#pop')
    515             ] + tag_rules,
    516             # raw block
    517             TOKEN_RAW_BEGIN: [
    518                 (c('(.*?)((?:\s*%s\-|%s)\s*endraw\s*(?:\-%s\s*|%s%s))' % (
    519                     e(environment.block_start_string),
    520                     block_prefix_re,
    521                     e(environment.block_end_string),
    522                     e(environment.block_end_string),
    523                     block_suffix_re
    524                 )), (TOKEN_DATA, TOKEN_RAW_END), '#pop'),
    525                 (c('(.)'), (Failure('Missing end of raw directive'),), None)
    526             ],
    527             # line statements
    528             TOKEN_LINESTATEMENT_BEGIN: [
    529                 (c(r'\s*(\n|$)'), TOKEN_LINESTATEMENT_END, '#pop')
    530             ] + tag_rules,
    531             # line comments
    532             TOKEN_LINECOMMENT_BEGIN: [
    533                 (c(r'(.*?)()(?=\n|$)'), (TOKEN_LINECOMMENT,
    534                  TOKEN_LINECOMMENT_END), '#pop')
    535             ]
    536         }
    537 
    538     def _normalize_newlines(self, value):
    539         """Called for strings and template data to normalize it to unicode."""
    540         return newline_re.sub(self.newline_sequence, value)
    541 
    542     def tokenize(self, source, name=None, filename=None, state=None):
    543         """Calls tokeniter + tokenize and wraps it in a token stream.
    544         """
    545         stream = self.tokeniter(source, name, filename, state)
    546         return TokenStream(self.wrap(stream, name, filename), name, filename)
    547 
    548     def wrap(self, stream, name=None, filename=None):
    549         """This is called with the stream as returned by `tokenize` and wraps
    550         every token in a :class:`Token` and converts the value.
    551         """
    552         for lineno, token, value in stream:
    553             if token in ignored_tokens:
    554                 continue
    555             elif token == 'linestatement_begin':
    556                 token = 'block_begin'
    557             elif token == 'linestatement_end':
    558                 token = 'block_end'
    559             # we are not interested in those tokens in the parser
    560             elif token in ('raw_begin', 'raw_end'):
    561                 continue
    562             elif token == 'data':
    563                 value = self._normalize_newlines(value)
    564             elif token == 'keyword':
    565                 token = value
    566             elif token == 'name':
    567                 value = str(value)
    568             elif token == 'string':
    569                 # try to unescape string
    570                 try:
    571                     value = self._normalize_newlines(value[1:-1]) \
    572                         .encode('ascii', 'backslashreplace') \
    573                         .decode('unicode-escape')
    574                 except Exception as e:
    575                     msg = str(e).split(':')[-1].strip()
    576                     raise TemplateSyntaxError(msg, lineno, name, filename)
    577                 # if we can express it as bytestring (ascii only)
    578                 # we do that for support of semi broken APIs
    579                 # as datetime.datetime.strftime.  On python 3 this
    580                 # call becomes a noop thanks to 2to3
    581                 try:
    582                     value = str(value)
    583                 except UnicodeError:
    584                     pass
    585             elif token == 'integer':
    586                 value = int(value)
    587             elif token == 'float':
    588                 value = float(value)
    589             elif token == 'operator':
    590                 token = operators[value]
    591             yield Token(lineno, token, value)
    592 
    593     def tokeniter(self, source, name, filename=None, state=None):
    594         """This method tokenizes the text and returns the tokens in a
    595         generator.  Use this method if you just want to tokenize a template.
    596         """
    597         source = text_type(source)
    598         lines = source.splitlines()
    599         if self.keep_trailing_newline and source:
    600             for newline in ('\r\n', '\r', '\n'):
    601                 if source.endswith(newline):
    602                     lines.append('')
    603                     break
    604         source = '\n'.join(lines)
    605         pos = 0
    606         lineno = 1
    607         stack = ['root']
    608         if state is not None and state != 'root':
    609             assert state in ('variable', 'block'), 'invalid state'
    610             stack.append(state + '_begin')
    611         else:
    612             state = 'root'
    613         statetokens = self.rules[stack[-1]]
    614         source_length = len(source)
    615 
    616         balancing_stack = []
    617 
    618         while 1:
    619             # tokenizer loop
    620             for regex, tokens, new_state in statetokens:
    621                 m = regex.match(source, pos)
    622                 # if no match we try again with the next rule
    623                 if m is None:
    624                     continue
    625 
    626                 # we only match blocks and variables if braces / parentheses
    627                 # are balanced. continue parsing with the lower rule which
    628                 # is the operator rule. do this only if the end tags look
    629                 # like operators
    630                 if balancing_stack and \
    631                    tokens in ('variable_end', 'block_end',
    632                               'linestatement_end'):
    633                     continue
    634 
    635                 # tuples support more options
    636                 if isinstance(tokens, tuple):
    637                     for idx, token in enumerate(tokens):
    638                         # failure group
    639                         if token.__class__ is Failure:
    640                             raise token(lineno, filename)
    641                         # bygroup is a bit more complex, in that case we
    642                         # yield for the current token the first named
    643                         # group that matched
    644                         elif token == '#bygroup':
    645                             for key, value in iteritems(m.groupdict()):
    646                                 if value is not None:
    647                                     yield lineno, key, value
    648                                     lineno += value.count('\n')
    649                                     break
    650                             else:
    651                                 raise RuntimeError('%r wanted to resolve '
    652                                                    'the token dynamically'
    653                                                    ' but no group matched'
    654                                                    % regex)
    655                         # normal group
    656                         else:
    657                             data = m.group(idx + 1)
    658                             if data or token not in ignore_if_empty:
    659                                 yield lineno, token, data
    660                             lineno += data.count('\n')
    661 
    662                 # strings as token just are yielded as it.
    663                 else:
    664                     data = m.group()
    665                     # update brace/parentheses balance
    666                     if tokens == 'operator':
    667                         if data == '{':
    668                             balancing_stack.append('}')
    669                         elif data == '(':
    670                             balancing_stack.append(')')
    671                         elif data == '[':
    672                             balancing_stack.append(']')
    673                         elif data in ('}', ')', ']'):
    674                             if not balancing_stack:
    675                                 raise TemplateSyntaxError('unexpected \'%s\'' %
    676                                                           data, lineno, name,
    677                                                           filename)
    678                             expected_op = balancing_stack.pop()
    679                             if expected_op != data:
    680                                 raise TemplateSyntaxError('unexpected \'%s\', '
    681                                                           'expected \'%s\'' %
    682                                                           (data, expected_op),
    683                                                           lineno, name,
    684                                                           filename)
    685                     # yield items
    686                     if data or tokens not in ignore_if_empty:
    687                         yield lineno, tokens, data
    688                     lineno += data.count('\n')
    689 
    690                 # fetch new position into new variable so that we can check
    691                 # if there is a internal parsing error which would result
    692                 # in an infinite loop
    693                 pos2 = m.end()
    694 
    695                 # handle state changes
    696                 if new_state is not None:
    697                     # remove the uppermost state
    698                     if new_state == '#pop':
    699                         stack.pop()
    700                     # resolve the new state by group checking
    701                     elif new_state == '#bygroup':
    702                         for key, value in iteritems(m.groupdict()):
    703                             if value is not None:
    704                                 stack.append(key)
    705                                 break
    706                         else:
    707                             raise RuntimeError('%r wanted to resolve the '
    708                                                'new state dynamically but'
    709                                                ' no group matched' %
    710                                                regex)
    711                     # direct state name given
    712                     else:
    713                         stack.append(new_state)
    714                     statetokens = self.rules[stack[-1]]
    715                 # we are still at the same position and no stack change.
    716                 # this means a loop without break condition, avoid that and
    717                 # raise error
    718                 elif pos2 == pos:
    719                     raise RuntimeError('%r yielded empty string without '
    720                                        'stack change' % regex)
    721                 # publish new function and start again
    722                 pos = pos2
    723                 break
    724             # if loop terminated without break we haven't found a single match
    725             # either we are at the end of the file or we have a problem
    726             else:
    727                 # end of text
    728                 if pos >= source_length:
    729                     return
    730                 # something went wrong
    731                 raise TemplateSyntaxError('unexpected char %r at %d' %
    732                                           (source[pos], pos), lineno,
    733                                           name, filename)
    734