Home | History | Annotate | Download | only in _markupsafe
      1 # -*- coding: utf-8 -*-
      2 """
      3     markupsafe
      4     ~~~~~~~~~~
      5 
      6     Implements a Markup string.
      7 
      8     :copyright: (c) 2010 by Armin Ronacher.
      9     :license: BSD, see LICENSE for more details.
     10 """
     11 import re
     12 from itertools import imap
     13 
     14 
     15 __all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent']
     16 
     17 
     18 _striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
     19 _entity_re = re.compile(r'&([^;]+);')
     20 
     21 
     22 class Markup(unicode):
     23     r"""Marks a string as being safe for inclusion in HTML/XML output without
     24     needing to be escaped.  This implements the `__html__` interface a couple
     25     of frameworks and web applications use.  :class:`Markup` is a direct
     26     subclass of `unicode` and provides all the methods of `unicode` just that
     27     it escapes arguments passed and always returns `Markup`.
     28 
     29     The `escape` function returns markup objects so that double escaping can't
     30     happen.
     31 
     32     The constructor of the :class:`Markup` class can be used for three
     33     different things:  When passed an unicode object it's assumed to be safe,
     34     when passed an object with an HTML representation (has an `__html__`
     35     method) that representation is used, otherwise the object passed is
     36     converted into a unicode string and then assumed to be safe:
     37 
     38     >>> Markup("Hello <em>World</em>!")
     39     Markup(u'Hello <em>World</em>!')
     40     >>> class Foo(object):
     41     ...  def __html__(self):
     42     ...   return '<a href="#">foo</a>'
     43     ... 
     44     >>> Markup(Foo())
     45     Markup(u'<a href="#">foo</a>')
     46 
     47     If you want object passed being always treated as unsafe you can use the
     48     :meth:`escape` classmethod to create a :class:`Markup` object:
     49 
     50     >>> Markup.escape("Hello <em>World</em>!")
     51     Markup(u'Hello &lt;em&gt;World&lt;/em&gt;!')
     52 
     53     Operations on a markup string are markup aware which means that all
     54     arguments are passed through the :func:`escape` function:
     55 
     56     >>> em = Markup("<em>%s</em>")
     57     >>> em % "foo & bar"
     58     Markup(u'<em>foo &amp; bar</em>')
     59     >>> strong = Markup("<strong>%(text)s</strong>")
     60     >>> strong % {'text': '<blink>hacker here</blink>'}
     61     Markup(u'<strong>&lt;blink&gt;hacker here&lt;/blink&gt;</strong>')
     62     >>> Markup("<em>Hello</em> ") + "<foo>"
     63     Markup(u'<em>Hello</em> &lt;foo&gt;')
     64     """
     65     __slots__ = ()
     66 
     67     def __new__(cls, base=u'', encoding=None, errors='strict'):
     68         if hasattr(base, '__html__'):
     69             base = base.__html__()
     70         if encoding is None:
     71             return unicode.__new__(cls, base)
     72         return unicode.__new__(cls, base, encoding, errors)
     73 
     74     def __html__(self):
     75         return self
     76 
     77     def __add__(self, other):
     78         if hasattr(other, '__html__') or isinstance(other, basestring):
     79             return self.__class__(unicode(self) + unicode(escape(other)))
     80         return NotImplemented
     81 
     82     def __radd__(self, other):
     83         if hasattr(other, '__html__') or isinstance(other, basestring):
     84             return self.__class__(unicode(escape(other)) + unicode(self))
     85         return NotImplemented
     86 
     87     def __mul__(self, num):
     88         if isinstance(num, (int, long)):
     89             return self.__class__(unicode.__mul__(self, num))
     90         return NotImplemented
     91     __rmul__ = __mul__
     92 
     93     def __mod__(self, arg):
     94         if isinstance(arg, tuple):
     95             arg = tuple(imap(_MarkupEscapeHelper, arg))
     96         else:
     97             arg = _MarkupEscapeHelper(arg)
     98         return self.__class__(unicode.__mod__(self, arg))
     99 
    100     def __repr__(self):
    101         return '%s(%s)' % (
    102             self.__class__.__name__,
    103             unicode.__repr__(self)
    104         )
    105 
    106     def join(self, seq):
    107         return self.__class__(unicode.join(self, imap(escape, seq)))
    108     join.__doc__ = unicode.join.__doc__
    109 
    110     def split(self, *args, **kwargs):
    111         return map(self.__class__, unicode.split(self, *args, **kwargs))
    112     split.__doc__ = unicode.split.__doc__
    113 
    114     def rsplit(self, *args, **kwargs):
    115         return map(self.__class__, unicode.rsplit(self, *args, **kwargs))
    116     rsplit.__doc__ = unicode.rsplit.__doc__
    117 
    118     def splitlines(self, *args, **kwargs):
    119         return map(self.__class__, unicode.splitlines(self, *args, **kwargs))
    120     splitlines.__doc__ = unicode.splitlines.__doc__
    121 
    122     def unescape(self):
    123         r"""Unescape markup again into an unicode string.  This also resolves
    124         known HTML4 and XHTML entities:
    125 
    126         >>> Markup("Main &raquo; <em>About</em>").unescape()
    127         u'Main \xbb <em>About</em>'
    128         """
    129         from jinja2._markupsafe._constants import HTML_ENTITIES
    130         def handle_match(m):
    131             name = m.group(1)
    132             if name in HTML_ENTITIES:
    133                 return unichr(HTML_ENTITIES[name])
    134             try:
    135                 if name[:2] in ('#x', '#X'):
    136                     return unichr(int(name[2:], 16))
    137                 elif name.startswith('#'):
    138                     return unichr(int(name[1:]))
    139             except ValueError:
    140                 pass
    141             return u''
    142         return _entity_re.sub(handle_match, unicode(self))
    143 
    144     def striptags(self):
    145         r"""Unescape markup into an unicode string and strip all tags.  This
    146         also resolves known HTML4 and XHTML entities.  Whitespace is
    147         normalized to one:
    148 
    149         >>> Markup("Main &raquo;  <em>About</em>").striptags()
    150         u'Main \xbb About'
    151         """
    152         stripped = u' '.join(_striptags_re.sub('', self).split())
    153         return Markup(stripped).unescape()
    154 
    155     @classmethod
    156     def escape(cls, s):
    157         """Escape the string.  Works like :func:`escape` with the difference
    158         that for subclasses of :class:`Markup` this function would return the
    159         correct subclass.
    160         """
    161         rv = escape(s)
    162         if rv.__class__ is not cls:
    163             return cls(rv)
    164         return rv
    165 
    166     def make_wrapper(name):
    167         orig = getattr(unicode, name)
    168         def func(self, *args, **kwargs):
    169             args = _escape_argspec(list(args), enumerate(args))
    170             _escape_argspec(kwargs, kwargs.iteritems())
    171             return self.__class__(orig(self, *args, **kwargs))
    172         func.__name__ = orig.__name__
    173         func.__doc__ = orig.__doc__
    174         return func
    175 
    176     for method in '__getitem__', 'capitalize', \
    177                   'title', 'lower', 'upper', 'replace', 'ljust', \
    178                   'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
    179                   'translate', 'expandtabs', 'swapcase', 'zfill':
    180         locals()[method] = make_wrapper(method)
    181 
    182     # new in python 2.5
    183     if hasattr(unicode, 'partition'):
    184         partition = make_wrapper('partition'),
    185         rpartition = make_wrapper('rpartition')
    186 
    187     # new in python 2.6
    188     if hasattr(unicode, 'format'):
    189         format = make_wrapper('format')
    190 
    191     # not in python 3
    192     if hasattr(unicode, '__getslice__'):
    193         __getslice__ = make_wrapper('__getslice__')
    194 
    195     del method, make_wrapper
    196 
    197 
    198 def _escape_argspec(obj, iterable):
    199     """Helper for various string-wrapped functions."""
    200     for key, value in iterable:
    201         if hasattr(value, '__html__') or isinstance(value, basestring):
    202             obj[key] = escape(value)
    203     return obj
    204 
    205 
    206 class _MarkupEscapeHelper(object):
    207     """Helper for Markup.__mod__"""
    208 
    209     def __init__(self, obj):
    210         self.obj = obj
    211 
    212     __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x])
    213     __str__ = lambda s: str(escape(s.obj))
    214     __unicode__ = lambda s: unicode(escape(s.obj))
    215     __repr__ = lambda s: str(escape(repr(s.obj)))
    216     __int__ = lambda s: int(s.obj)
    217     __float__ = lambda s: float(s.obj)
    218 
    219 
    220 # we have to import it down here as the speedups and native
    221 # modules imports the markup type which is define above.
    222 try:
    223     from jinja2._markupsafe._speedups import escape, escape_silent, soft_unicode
    224 except ImportError:
    225     from jinja2._markupsafe._native import escape, escape_silent, soft_unicode
    226