Home | History | Annotate | Download | only in markupsafe
      1 # -*- coding: utf-8 -*-
      2 """
      3     markupsafe
      4     ~~~~~~~~~~
      5 
      6     Implements a Markup string.
      7 
      8     :copyright: (c) 2010 by Armin Ronacher.
      9     :license: BSD, see LICENSE for more details.
     10 """
     11 import re
     12 from markupsafe._compat import text_type, string_types, int_types, \
     13      unichr, PY2
     14 
     15 
     16 __all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent']
     17 
     18 
     19 _striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
     20 _entity_re = re.compile(r'&([^;]+);')
     21 
     22 
     23 class Markup(text_type):
     24     r"""Marks a string as being safe for inclusion in HTML/XML output without
     25     needing to be escaped.  This implements the `__html__` interface a couple
     26     of frameworks and web applications use.  :class:`Markup` is a direct
     27     subclass of `unicode` and provides all the methods of `unicode` just that
     28     it escapes arguments passed and always returns `Markup`.
     29 
     30     The `escape` function returns markup objects so that double escaping can't
     31     happen.
     32 
     33     The constructor of the :class:`Markup` class can be used for three
     34     different things:  When passed an unicode object it's assumed to be safe,
     35     when passed an object with an HTML representation (has an `__html__`
     36     method) that representation is used, otherwise the object passed is
     37     converted into a unicode string and then assumed to be safe:
     38 
     39     >>> Markup("Hello <em>World</em>!")
     40     Markup(u'Hello <em>World</em>!')
     41     >>> class Foo(object):
     42     ...  def __html__(self):
     43     ...   return '<a href="#">foo</a>'
     44     ... 
     45     >>> Markup(Foo())
     46     Markup(u'<a href="#">foo</a>')
     47 
     48     If you want object passed being always treated as unsafe you can use the
     49     :meth:`escape` classmethod to create a :class:`Markup` object:
     50 
     51     >>> Markup.escape("Hello <em>World</em>!")
     52     Markup(u'Hello &lt;em&gt;World&lt;/em&gt;!')
     53 
     54     Operations on a markup string are markup aware which means that all
     55     arguments are passed through the :func:`escape` function:
     56 
     57     >>> em = Markup("<em>%s</em>")
     58     >>> em % "foo & bar"
     59     Markup(u'<em>foo &amp; bar</em>')
     60     >>> strong = Markup("<strong>%(text)s</strong>")
     61     >>> strong % {'text': '<blink>hacker here</blink>'}
     62     Markup(u'<strong>&lt;blink&gt;hacker here&lt;/blink&gt;</strong>')
     63     >>> Markup("<em>Hello</em> ") + "<foo>"
     64     Markup(u'<em>Hello</em> &lt;foo&gt;')
     65     """
     66     __slots__ = ()
     67 
     68     def __new__(cls, base=u'', encoding=None, errors='strict'):
     69         if hasattr(base, '__html__'):
     70             base = base.__html__()
     71         if encoding is None:
     72             return text_type.__new__(cls, base)
     73         return text_type.__new__(cls, base, encoding, errors)
     74 
     75     def __html__(self):
     76         return self
     77 
     78     def __add__(self, other):
     79         if isinstance(other, string_types) or hasattr(other, '__html__'):
     80             return self.__class__(super(Markup, self).__add__(self.escape(other)))
     81         return NotImplemented
     82 
     83     def __radd__(self, other):
     84         if hasattr(other, '__html__') or isinstance(other, string_types):
     85             return self.escape(other).__add__(self)
     86         return NotImplemented
     87 
     88     def __mul__(self, num):
     89         if isinstance(num, int_types):
     90             return self.__class__(text_type.__mul__(self, num))
     91         return NotImplemented
     92     __rmul__ = __mul__
     93 
     94     def __mod__(self, arg):
     95         if isinstance(arg, tuple):
     96             arg = tuple(_MarkupEscapeHelper(x, self.escape) for x in arg)
     97         else:
     98             arg = _MarkupEscapeHelper(arg, self.escape)
     99         return self.__class__(text_type.__mod__(self, arg))
    100 
    101     def __repr__(self):
    102         return '%s(%s)' % (
    103             self.__class__.__name__,
    104             text_type.__repr__(self)
    105         )
    106 
    107     def join(self, seq):
    108         return self.__class__(text_type.join(self, map(self.escape, seq)))
    109     join.__doc__ = text_type.join.__doc__
    110 
    111     def split(self, *args, **kwargs):
    112         return list(map(self.__class__, text_type.split(self, *args, **kwargs)))
    113     split.__doc__ = text_type.split.__doc__
    114 
    115     def rsplit(self, *args, **kwargs):
    116         return list(map(self.__class__, text_type.rsplit(self, *args, **kwargs)))
    117     rsplit.__doc__ = text_type.rsplit.__doc__
    118 
    119     def splitlines(self, *args, **kwargs):
    120         return list(map(self.__class__, text_type.splitlines(self, *args, **kwargs)))
    121     splitlines.__doc__ = text_type.splitlines.__doc__
    122 
    123     def unescape(self):
    124         r"""Unescape markup again into an text_type string.  This also resolves
    125         known HTML4 and XHTML entities:
    126 
    127         >>> Markup("Main &raquo; <em>About</em>").unescape()
    128         u'Main \xbb <em>About</em>'
    129         """
    130         from markupsafe._constants import HTML_ENTITIES
    131         def handle_match(m):
    132             name = m.group(1)
    133             if name in HTML_ENTITIES:
    134                 return unichr(HTML_ENTITIES[name])
    135             try:
    136                 if name[:2] in ('#x', '#X'):
    137                     return unichr(int(name[2:], 16))
    138                 elif name.startswith('#'):
    139                     return unichr(int(name[1:]))
    140             except ValueError:
    141                 pass
    142             return u''
    143         return _entity_re.sub(handle_match, text_type(self))
    144 
    145     def striptags(self):
    146         r"""Unescape markup into an text_type string and strip all tags.  This
    147         also resolves known HTML4 and XHTML entities.  Whitespace is
    148         normalized to one:
    149 
    150         >>> Markup("Main &raquo;  <em>About</em>").striptags()
    151         u'Main \xbb About'
    152         """
    153         stripped = u' '.join(_striptags_re.sub('', self).split())
    154         return Markup(stripped).unescape()
    155 
    156     @classmethod
    157     def escape(cls, s):
    158         """Escape the string.  Works like :func:`escape` with the difference
    159         that for subclasses of :class:`Markup` this function would return the
    160         correct subclass.
    161         """
    162         rv = escape(s)
    163         if rv.__class__ is not cls:
    164             return cls(rv)
    165         return rv
    166 
    167     def make_wrapper(name):
    168         orig = getattr(text_type, name)
    169         def func(self, *args, **kwargs):
    170             args = _escape_argspec(list(args), enumerate(args), self.escape)
    171             #_escape_argspec(kwargs, kwargs.iteritems(), None)
    172             return self.__class__(orig(self, *args, **kwargs))
    173         func.__name__ = orig.__name__
    174         func.__doc__ = orig.__doc__
    175         return func
    176 
    177     for method in '__getitem__', 'capitalize', \
    178                   'title', 'lower', 'upper', 'replace', 'ljust', \
    179                   'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
    180                   'translate', 'expandtabs', 'swapcase', 'zfill':
    181         locals()[method] = make_wrapper(method)
    182 
    183     # new in python 2.5
    184     if hasattr(text_type, 'partition'):
    185         def partition(self, sep):
    186             return tuple(map(self.__class__,
    187                              text_type.partition(self, self.escape(sep))))
    188         def rpartition(self, sep):
    189             return tuple(map(self.__class__,
    190                              text_type.rpartition(self, self.escape(sep))))
    191 
    192     # new in python 2.6
    193     if hasattr(text_type, 'format'):
    194         format = make_wrapper('format')
    195 
    196     # not in python 3
    197     if hasattr(text_type, '__getslice__'):
    198         __getslice__ = make_wrapper('__getslice__')
    199 
    200     del method, make_wrapper
    201 
    202 
    203 def _escape_argspec(obj, iterable, escape):
    204     """Helper for various string-wrapped functions."""
    205     for key, value in iterable:
    206         if hasattr(value, '__html__') or isinstance(value, string_types):
    207             obj[key] = escape(value)
    208     return obj
    209 
    210 
    211 class _MarkupEscapeHelper(object):
    212     """Helper for Markup.__mod__"""
    213 
    214     def __init__(self, obj, escape):
    215         self.obj = obj
    216         self.escape = escape
    217 
    218     __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x], s.escape)
    219     __unicode__ = __str__ = lambda s: text_type(s.escape(s.obj))
    220     __repr__ = lambda s: str(s.escape(repr(s.obj)))
    221     __int__ = lambda s: int(s.obj)
    222     __float__ = lambda s: float(s.obj)
    223 
    224 
    225 # we have to import it down here as the speedups and native
    226 # modules imports the markup type which is define above.
    227 try:
    228     from markupsafe._speedups import escape, escape_silent, soft_unicode
    229 except ImportError:
    230     from markupsafe._native import escape, escape_silent, soft_unicode
    231 
    232 if not PY2:
    233     soft_str = soft_unicode
    234     __all__.append('soft_str')
    235