Home | History | Annotate | Download | only in jinja2
      1 # -*- coding: utf-8 -*-
      2 """
      3     jinja2.ext
      4     ~~~~~~~~~~
      5 
      6     Jinja extensions allow to add custom tags similar to the way django custom
      7     tags work.  By default two example extensions exist: an i18n and a cache
      8     extension.
      9 
     10     :copyright: (c) 2017 by the Jinja Team.
     11     :license: BSD.
     12 """
     13 import re
     14 
     15 from jinja2 import nodes
     16 from jinja2.defaults import BLOCK_START_STRING, \
     17      BLOCK_END_STRING, VARIABLE_START_STRING, VARIABLE_END_STRING, \
     18      COMMENT_START_STRING, COMMENT_END_STRING, LINE_STATEMENT_PREFIX, \
     19      LINE_COMMENT_PREFIX, TRIM_BLOCKS, NEWLINE_SEQUENCE, \
     20      KEEP_TRAILING_NEWLINE, LSTRIP_BLOCKS
     21 from jinja2.environment import Environment
     22 from jinja2.runtime import concat
     23 from jinja2.exceptions import TemplateAssertionError, TemplateSyntaxError
     24 from jinja2.utils import contextfunction, import_string, Markup
     25 from jinja2._compat import with_metaclass, string_types, iteritems
     26 
     27 
     28 # the only real useful gettext functions for a Jinja template.  Note
     29 # that ugettext must be assigned to gettext as Jinja doesn't support
     30 # non unicode strings.
     31 GETTEXT_FUNCTIONS = ('_', 'gettext', 'ngettext')
     32 
     33 
     34 class ExtensionRegistry(type):
     35     """Gives the extension an unique identifier."""
     36 
     37     def __new__(cls, name, bases, d):
     38         rv = type.__new__(cls, name, bases, d)
     39         rv.identifier = rv.__module__ + '.' + rv.__name__
     40         return rv
     41 
     42 
     43 class Extension(with_metaclass(ExtensionRegistry, object)):
     44     """Extensions can be used to add extra functionality to the Jinja template
     45     system at the parser level.  Custom extensions are bound to an environment
     46     but may not store environment specific data on `self`.  The reason for
     47     this is that an extension can be bound to another environment (for
     48     overlays) by creating a copy and reassigning the `environment` attribute.
     49 
     50     As extensions are created by the environment they cannot accept any
     51     arguments for configuration.  One may want to work around that by using
     52     a factory function, but that is not possible as extensions are identified
     53     by their import name.  The correct way to configure the extension is
     54     storing the configuration values on the environment.  Because this way the
     55     environment ends up acting as central configuration storage the
     56     attributes may clash which is why extensions have to ensure that the names
     57     they choose for configuration are not too generic.  ``prefix`` for example
     58     is a terrible name, ``fragment_cache_prefix`` on the other hand is a good
     59     name as includes the name of the extension (fragment cache).
     60     """
     61 
     62     #: if this extension parses this is the list of tags it's listening to.
     63     tags = set()
     64 
     65     #: the priority of that extension.  This is especially useful for
     66     #: extensions that preprocess values.  A lower value means higher
     67     #: priority.
     68     #:
     69     #: .. versionadded:: 2.4
     70     priority = 100
     71 
     72     def __init__(self, environment):
     73         self.environment = environment
     74 
     75     def bind(self, environment):
     76         """Create a copy of this extension bound to another environment."""
     77         rv = object.__new__(self.__class__)
     78         rv.__dict__.update(self.__dict__)
     79         rv.environment = environment
     80         return rv
     81 
     82     def preprocess(self, source, name, filename=None):
     83         """This method is called before the actual lexing and can be used to
     84         preprocess the source.  The `filename` is optional.  The return value
     85         must be the preprocessed source.
     86         """
     87         return source
     88 
     89     def filter_stream(self, stream):
     90         """It's passed a :class:`~jinja2.lexer.TokenStream` that can be used
     91         to filter tokens returned.  This method has to return an iterable of
     92         :class:`~jinja2.lexer.Token`\\s, but it doesn't have to return a
     93         :class:`~jinja2.lexer.TokenStream`.
     94 
     95         In the `ext` folder of the Jinja2 source distribution there is a file
     96         called `inlinegettext.py` which implements a filter that utilizes this
     97         method.
     98         """
     99         return stream
    100 
    101     def parse(self, parser):
    102         """If any of the :attr:`tags` matched this method is called with the
    103         parser as first argument.  The token the parser stream is pointing at
    104         is the name token that matched.  This method has to return one or a
    105         list of multiple nodes.
    106         """
    107         raise NotImplementedError()
    108 
    109     def attr(self, name, lineno=None):
    110         """Return an attribute node for the current extension.  This is useful
    111         to pass constants on extensions to generated template code.
    112 
    113         ::
    114 
    115             self.attr('_my_attribute', lineno=lineno)
    116         """
    117         return nodes.ExtensionAttribute(self.identifier, name, lineno=lineno)
    118 
    119     def call_method(self, name, args=None, kwargs=None, dyn_args=None,
    120                     dyn_kwargs=None, lineno=None):
    121         """Call a method of the extension.  This is a shortcut for
    122         :meth:`attr` + :class:`jinja2.nodes.Call`.
    123         """
    124         if args is None:
    125             args = []
    126         if kwargs is None:
    127             kwargs = []
    128         return nodes.Call(self.attr(name, lineno=lineno), args, kwargs,
    129                           dyn_args, dyn_kwargs, lineno=lineno)
    130 
    131 
    132 @contextfunction
    133 def _gettext_alias(__context, *args, **kwargs):
    134     return __context.call(__context.resolve('gettext'), *args, **kwargs)
    135 
    136 
    137 def _make_new_gettext(func):
    138     @contextfunction
    139     def gettext(__context, __string, **variables):
    140         rv = __context.call(func, __string)
    141         if __context.eval_ctx.autoescape:
    142             rv = Markup(rv)
    143         return rv % variables
    144     return gettext
    145 
    146 
    147 def _make_new_ngettext(func):
    148     @contextfunction
    149     def ngettext(__context, __singular, __plural, __num, **variables):
    150         variables.setdefault('num', __num)
    151         rv = __context.call(func, __singular, __plural, __num)
    152         if __context.eval_ctx.autoescape:
    153             rv = Markup(rv)
    154         return rv % variables
    155     return ngettext
    156 
    157 
    158 class InternationalizationExtension(Extension):
    159     """This extension adds gettext support to Jinja2."""
    160     tags = set(['trans'])
    161 
    162     # TODO: the i18n extension is currently reevaluating values in a few
    163     # situations.  Take this example:
    164     #   {% trans count=something() %}{{ count }} foo{% pluralize
    165     #     %}{{ count }} fooss{% endtrans %}
    166     # something is called twice here.  One time for the gettext value and
    167     # the other time for the n-parameter of the ngettext function.
    168 
    169     def __init__(self, environment):
    170         Extension.__init__(self, environment)
    171         environment.globals['_'] = _gettext_alias
    172         environment.extend(
    173             install_gettext_translations=self._install,
    174             install_null_translations=self._install_null,
    175             install_gettext_callables=self._install_callables,
    176             uninstall_gettext_translations=self._uninstall,
    177             extract_translations=self._extract,
    178             newstyle_gettext=False
    179         )
    180 
    181     def _install(self, translations, newstyle=None):
    182         gettext = getattr(translations, 'ugettext', None)
    183         if gettext is None:
    184             gettext = translations.gettext
    185         ngettext = getattr(translations, 'ungettext', None)
    186         if ngettext is None:
    187             ngettext = translations.ngettext
    188         self._install_callables(gettext, ngettext, newstyle)
    189 
    190     def _install_null(self, newstyle=None):
    191         self._install_callables(
    192             lambda x: x,
    193             lambda s, p, n: (n != 1 and (p,) or (s,))[0],
    194             newstyle
    195         )
    196 
    197     def _install_callables(self, gettext, ngettext, newstyle=None):
    198         if newstyle is not None:
    199             self.environment.newstyle_gettext = newstyle
    200         if self.environment.newstyle_gettext:
    201             gettext = _make_new_gettext(gettext)
    202             ngettext = _make_new_ngettext(ngettext)
    203         self.environment.globals.update(
    204             gettext=gettext,
    205             ngettext=ngettext
    206         )
    207 
    208     def _uninstall(self, translations):
    209         for key in 'gettext', 'ngettext':
    210             self.environment.globals.pop(key, None)
    211 
    212     def _extract(self, source, gettext_functions=GETTEXT_FUNCTIONS):
    213         if isinstance(source, string_types):
    214             source = self.environment.parse(source)
    215         return extract_from_ast(source, gettext_functions)
    216 
    217     def parse(self, parser):
    218         """Parse a translatable tag."""
    219         lineno = next(parser.stream).lineno
    220         num_called_num = False
    221 
    222         # find all the variables referenced.  Additionally a variable can be
    223         # defined in the body of the trans block too, but this is checked at
    224         # a later state.
    225         plural_expr = None
    226         plural_expr_assignment = None
    227         variables = {}
    228         trimmed = None
    229         while parser.stream.current.type != 'block_end':
    230             if variables:
    231                 parser.stream.expect('comma')
    232 
    233             # skip colon for python compatibility
    234             if parser.stream.skip_if('colon'):
    235                 break
    236 
    237             name = parser.stream.expect('name')
    238             if name.value in variables:
    239                 parser.fail('translatable variable %r defined twice.' %
    240                             name.value, name.lineno,
    241                             exc=TemplateAssertionError)
    242 
    243             # expressions
    244             if parser.stream.current.type == 'assign':
    245                 next(parser.stream)
    246                 variables[name.value] = var = parser.parse_expression()
    247             elif trimmed is None and name.value in ('trimmed', 'notrimmed'):
    248                 trimmed = name.value == 'trimmed'
    249                 continue
    250             else:
    251                 variables[name.value] = var = nodes.Name(name.value, 'load')
    252 
    253             if plural_expr is None:
    254                 if isinstance(var, nodes.Call):
    255                     plural_expr = nodes.Name('_trans', 'load')
    256                     variables[name.value] = plural_expr
    257                     plural_expr_assignment = nodes.Assign(
    258                         nodes.Name('_trans', 'store'), var)
    259                 else:
    260                     plural_expr = var
    261                 num_called_num = name.value == 'num'
    262 
    263         parser.stream.expect('block_end')
    264 
    265         plural = None
    266         have_plural = False
    267         referenced = set()
    268 
    269         # now parse until endtrans or pluralize
    270         singular_names, singular = self._parse_block(parser, True)
    271         if singular_names:
    272             referenced.update(singular_names)
    273             if plural_expr is None:
    274                 plural_expr = nodes.Name(singular_names[0], 'load')
    275                 num_called_num = singular_names[0] == 'num'
    276 
    277         # if we have a pluralize block, we parse that too
    278         if parser.stream.current.test('name:pluralize'):
    279             have_plural = True
    280             next(parser.stream)
    281             if parser.stream.current.type != 'block_end':
    282                 name = parser.stream.expect('name')
    283                 if name.value not in variables:
    284                     parser.fail('unknown variable %r for pluralization' %
    285                                 name.value, name.lineno,
    286                                 exc=TemplateAssertionError)
    287                 plural_expr = variables[name.value]
    288                 num_called_num = name.value == 'num'
    289             parser.stream.expect('block_end')
    290             plural_names, plural = self._parse_block(parser, False)
    291             next(parser.stream)
    292             referenced.update(plural_names)
    293         else:
    294             next(parser.stream)
    295 
    296         # register free names as simple name expressions
    297         for var in referenced:
    298             if var not in variables:
    299                 variables[var] = nodes.Name(var, 'load')
    300 
    301         if not have_plural:
    302             plural_expr = None
    303         elif plural_expr is None:
    304             parser.fail('pluralize without variables', lineno)
    305 
    306         if trimmed is None:
    307             trimmed = self.environment.policies['ext.i18n.trimmed']
    308         if trimmed:
    309             singular = self._trim_whitespace(singular)
    310             if plural:
    311                 plural = self._trim_whitespace(plural)
    312 
    313         node = self._make_node(singular, plural, variables, plural_expr,
    314                                bool(referenced),
    315                                num_called_num and have_plural)
    316         node.set_lineno(lineno)
    317         if plural_expr_assignment is not None:
    318             return [plural_expr_assignment, node]
    319         else:
    320             return node
    321 
    322     def _trim_whitespace(self, string, _ws_re=re.compile(r'\s*\n\s*')):
    323         return _ws_re.sub(' ', string.strip())
    324 
    325     def _parse_block(self, parser, allow_pluralize):
    326         """Parse until the next block tag with a given name."""
    327         referenced = []
    328         buf = []
    329         while 1:
    330             if parser.stream.current.type == 'data':
    331                 buf.append(parser.stream.current.value.replace('%', '%%'))
    332                 next(parser.stream)
    333             elif parser.stream.current.type == 'variable_begin':
    334                 next(parser.stream)
    335                 name = parser.stream.expect('name').value
    336                 referenced.append(name)
    337                 buf.append('%%(%s)s' % name)
    338                 parser.stream.expect('variable_end')
    339             elif parser.stream.current.type == 'block_begin':
    340                 next(parser.stream)
    341                 if parser.stream.current.test('name:endtrans'):
    342                     break
    343                 elif parser.stream.current.test('name:pluralize'):
    344                     if allow_pluralize:
    345                         break
    346                     parser.fail('a translatable section can have only one '
    347                                 'pluralize section')
    348                 parser.fail('control structures in translatable sections are '
    349                             'not allowed')
    350             elif parser.stream.eos:
    351                 parser.fail('unclosed translation block')
    352             else:
    353                 assert False, 'internal parser error'
    354 
    355         return referenced, concat(buf)
    356 
    357     def _make_node(self, singular, plural, variables, plural_expr,
    358                    vars_referenced, num_called_num):
    359         """Generates a useful node from the data provided."""
    360         # no variables referenced?  no need to escape for old style
    361         # gettext invocations only if there are vars.
    362         if not vars_referenced and not self.environment.newstyle_gettext:
    363             singular = singular.replace('%%', '%')
    364             if plural:
    365                 plural = plural.replace('%%', '%')
    366 
    367         # singular only:
    368         if plural_expr is None:
    369             gettext = nodes.Name('gettext', 'load')
    370             node = nodes.Call(gettext, [nodes.Const(singular)],
    371                               [], None, None)
    372 
    373         # singular and plural
    374         else:
    375             ngettext = nodes.Name('ngettext', 'load')
    376             node = nodes.Call(ngettext, [
    377                 nodes.Const(singular),
    378                 nodes.Const(plural),
    379                 plural_expr
    380             ], [], None, None)
    381 
    382         # in case newstyle gettext is used, the method is powerful
    383         # enough to handle the variable expansion and autoescape
    384         # handling itself
    385         if self.environment.newstyle_gettext:
    386             for key, value in iteritems(variables):
    387                 # the function adds that later anyways in case num was
    388                 # called num, so just skip it.
    389                 if num_called_num and key == 'num':
    390                     continue
    391                 node.kwargs.append(nodes.Keyword(key, value))
    392 
    393         # otherwise do that here
    394         else:
    395             # mark the return value as safe if we are in an
    396             # environment with autoescaping turned on
    397             node = nodes.MarkSafeIfAutoescape(node)
    398             if variables:
    399                 node = nodes.Mod(node, nodes.Dict([
    400                     nodes.Pair(nodes.Const(key), value)
    401                     for key, value in variables.items()
    402                 ]))
    403         return nodes.Output([node])
    404 
    405 
    406 class ExprStmtExtension(Extension):
    407     """Adds a `do` tag to Jinja2 that works like the print statement just
    408     that it doesn't print the return value.
    409     """
    410     tags = set(['do'])
    411 
    412     def parse(self, parser):
    413         node = nodes.ExprStmt(lineno=next(parser.stream).lineno)
    414         node.node = parser.parse_tuple()
    415         return node
    416 
    417 
    418 class LoopControlExtension(Extension):
    419     """Adds break and continue to the template engine."""
    420     tags = set(['break', 'continue'])
    421 
    422     def parse(self, parser):
    423         token = next(parser.stream)
    424         if token.value == 'break':
    425             return nodes.Break(lineno=token.lineno)
    426         return nodes.Continue(lineno=token.lineno)
    427 
    428 
    429 class WithExtension(Extension):
    430     pass
    431 
    432 
    433 class AutoEscapeExtension(Extension):
    434     pass
    435 
    436 
    437 def extract_from_ast(node, gettext_functions=GETTEXT_FUNCTIONS,
    438                      babel_style=True):
    439     """Extract localizable strings from the given template node.  Per
    440     default this function returns matches in babel style that means non string
    441     parameters as well as keyword arguments are returned as `None`.  This
    442     allows Babel to figure out what you really meant if you are using
    443     gettext functions that allow keyword arguments for placeholder expansion.
    444     If you don't want that behavior set the `babel_style` parameter to `False`
    445     which causes only strings to be returned and parameters are always stored
    446     in tuples.  As a consequence invalid gettext calls (calls without a single
    447     string parameter or string parameters after non-string parameters) are
    448     skipped.
    449 
    450     This example explains the behavior:
    451 
    452     >>> from jinja2 import Environment
    453     >>> env = Environment()
    454     >>> node = env.parse('{{ (_("foo"), _(), ngettext("foo", "bar", 42)) }}')
    455     >>> list(extract_from_ast(node))
    456     [(1, '_', 'foo'), (1, '_', ()), (1, 'ngettext', ('foo', 'bar', None))]
    457     >>> list(extract_from_ast(node, babel_style=False))
    458     [(1, '_', ('foo',)), (1, 'ngettext', ('foo', 'bar'))]
    459 
    460     For every string found this function yields a ``(lineno, function,
    461     message)`` tuple, where:
    462 
    463     * ``lineno`` is the number of the line on which the string was found,
    464     * ``function`` is the name of the ``gettext`` function used (if the
    465       string was extracted from embedded Python code), and
    466     *  ``message`` is the string itself (a ``unicode`` object, or a tuple
    467        of ``unicode`` objects for functions with multiple string arguments).
    468 
    469     This extraction function operates on the AST and is because of that unable
    470     to extract any comments.  For comment support you have to use the babel
    471     extraction interface or extract comments yourself.
    472     """
    473     for node in node.find_all(nodes.Call):
    474         if not isinstance(node.node, nodes.Name) or \
    475            node.node.name not in gettext_functions:
    476             continue
    477 
    478         strings = []
    479         for arg in node.args:
    480             if isinstance(arg, nodes.Const) and \
    481                isinstance(arg.value, string_types):
    482                 strings.append(arg.value)
    483             else:
    484                 strings.append(None)
    485 
    486         for arg in node.kwargs:
    487             strings.append(None)
    488         if node.dyn_args is not None:
    489             strings.append(None)
    490         if node.dyn_kwargs is not None:
    491             strings.append(None)
    492 
    493         if not babel_style:
    494             strings = tuple(x for x in strings if x is not None)
    495             if not strings:
    496                 continue
    497         else:
    498             if len(strings) == 1:
    499                 strings = strings[0]
    500             else:
    501                 strings = tuple(strings)
    502         yield node.lineno, node.node.name, strings
    503 
    504 
    505 class _CommentFinder(object):
    506     """Helper class to find comments in a token stream.  Can only
    507     find comments for gettext calls forwards.  Once the comment
    508     from line 4 is found, a comment for line 1 will not return a
    509     usable value.
    510     """
    511 
    512     def __init__(self, tokens, comment_tags):
    513         self.tokens = tokens
    514         self.comment_tags = comment_tags
    515         self.offset = 0
    516         self.last_lineno = 0
    517 
    518     def find_backwards(self, offset):
    519         try:
    520             for _, token_type, token_value in \
    521                     reversed(self.tokens[self.offset:offset]):
    522                 if token_type in ('comment', 'linecomment'):
    523                     try:
    524                         prefix, comment = token_value.split(None, 1)
    525                     except ValueError:
    526                         continue
    527                     if prefix in self.comment_tags:
    528                         return [comment.rstrip()]
    529             return []
    530         finally:
    531             self.offset = offset
    532 
    533     def find_comments(self, lineno):
    534         if not self.comment_tags or self.last_lineno > lineno:
    535             return []
    536         for idx, (token_lineno, _, _) in enumerate(self.tokens[self.offset:]):
    537             if token_lineno > lineno:
    538                 return self.find_backwards(self.offset + idx)
    539         return self.find_backwards(len(self.tokens))
    540 
    541 
    542 def babel_extract(fileobj, keywords, comment_tags, options):
    543     """Babel extraction method for Jinja templates.
    544 
    545     .. versionchanged:: 2.3
    546        Basic support for translation comments was added.  If `comment_tags`
    547        is now set to a list of keywords for extraction, the extractor will
    548        try to find the best preceeding comment that begins with one of the
    549        keywords.  For best results, make sure to not have more than one
    550        gettext call in one line of code and the matching comment in the
    551        same line or the line before.
    552 
    553     .. versionchanged:: 2.5.1
    554        The `newstyle_gettext` flag can be set to `True` to enable newstyle
    555        gettext calls.
    556 
    557     .. versionchanged:: 2.7
    558        A `silent` option can now be provided.  If set to `False` template
    559        syntax errors are propagated instead of being ignored.
    560 
    561     :param fileobj: the file-like object the messages should be extracted from
    562     :param keywords: a list of keywords (i.e. function names) that should be
    563                      recognized as translation functions
    564     :param comment_tags: a list of translator tags to search for and include
    565                          in the results.
    566     :param options: a dictionary of additional options (optional)
    567     :return: an iterator over ``(lineno, funcname, message, comments)`` tuples.
    568              (comments will be empty currently)
    569     """
    570     extensions = set()
    571     for extension in options.get('extensions', '').split(','):
    572         extension = extension.strip()
    573         if not extension:
    574             continue
    575         extensions.add(import_string(extension))
    576     if InternationalizationExtension not in extensions:
    577         extensions.add(InternationalizationExtension)
    578 
    579     def getbool(options, key, default=False):
    580         return options.get(key, str(default)).lower() in \
    581             ('1', 'on', 'yes', 'true')
    582 
    583     silent = getbool(options, 'silent', True)
    584     environment = Environment(
    585         options.get('block_start_string', BLOCK_START_STRING),
    586         options.get('block_end_string', BLOCK_END_STRING),
    587         options.get('variable_start_string', VARIABLE_START_STRING),
    588         options.get('variable_end_string', VARIABLE_END_STRING),
    589         options.get('comment_start_string', COMMENT_START_STRING),
    590         options.get('comment_end_string', COMMENT_END_STRING),
    591         options.get('line_statement_prefix') or LINE_STATEMENT_PREFIX,
    592         options.get('line_comment_prefix') or LINE_COMMENT_PREFIX,
    593         getbool(options, 'trim_blocks', TRIM_BLOCKS),
    594         getbool(options, 'lstrip_blocks', LSTRIP_BLOCKS),
    595         NEWLINE_SEQUENCE,
    596         getbool(options, 'keep_trailing_newline', KEEP_TRAILING_NEWLINE),
    597         frozenset(extensions),
    598         cache_size=0,
    599         auto_reload=False
    600     )
    601 
    602     if getbool(options, 'trimmed'):
    603         environment.policies['ext.i18n.trimmed'] = True
    604     if getbool(options, 'newstyle_gettext'):
    605         environment.newstyle_gettext = True
    606 
    607     source = fileobj.read().decode(options.get('encoding', 'utf-8'))
    608     try:
    609         node = environment.parse(source)
    610         tokens = list(environment.lex(environment.preprocess(source)))
    611     except TemplateSyntaxError as e:
    612         if not silent:
    613             raise
    614         # skip templates with syntax errors
    615         return
    616 
    617     finder = _CommentFinder(tokens, comment_tags)
    618     for lineno, func, message in extract_from_ast(node, keywords):
    619         yield lineno, func, message, finder.find_comments(lineno)
    620 
    621 
    622 #: nicer import names
    623 i18n = InternationalizationExtension
    624 do = ExprStmtExtension
    625 loopcontrols = LoopControlExtension
    626 with_ = WithExtension
    627 autoescape = AutoEscapeExtension
    628