Home | History | Annotate | Download | only in util
      1 # (c) 2005 Ian Bicking and contributors; written for Paste (http://pythonpaste.org)
      2 # Licensed under the MIT license: http://www.opensource.org/licenses/mit-license.php
      3 
      4 import cgi
      5 import six
      6 import re
      7 from six.moves import html_entities
      8 from six.moves.urllib.parse import quote, unquote
      9 
     10 
     11 __all__ = ['html_quote', 'html_unquote', 'url_quote', 'url_unquote',
     12            'strip_html']
     13 
     14 default_encoding = 'UTF-8'
     15 
     16 def html_quote(v, encoding=None):
     17     r"""
     18     Quote the value (turned to a string) as HTML.  This quotes <, >,
     19     and quotes:
     20     """
     21     encoding = encoding or default_encoding
     22     if v is None:
     23         return ''
     24     elif isinstance(v, six.binary_type):
     25         return cgi.escape(v, 1)
     26     elif isinstance(v, six.text_type):
     27         if six.PY3:
     28             return cgi.escape(v, 1)
     29         else:
     30             return cgi.escape(v.encode(encoding), 1)
     31     else:
     32         if six.PY3:
     33             return cgi.escape(six.text_type(v), 1)
     34         else:
     35             return cgi.escape(six.text_type(v).encode(encoding), 1)
     36 
     37 _unquote_re = re.compile(r'&([a-zA-Z]+);')
     38 def _entity_subber(match, name2c=html_entities.name2codepoint):
     39     code = name2c.get(match.group(1))
     40     if code:
     41         return six.unichr(code)
     42     else:
     43         return match.group(0)
     44 
     45 def html_unquote(s, encoding=None):
     46     r"""
     47     Decode the value.
     48 
     49     """
     50     if isinstance(s, six.binary_type):
     51         s = s.decode(encoding or default_encoding)
     52     return _unquote_re.sub(_entity_subber, s)
     53 
     54 def strip_html(s):
     55     # should this use html_unquote?
     56     s = re.sub('<.*?>', '', s)
     57     s = html_unquote(s)
     58     return s
     59 
     60 def no_quote(s):
     61     """
     62     Quoting that doesn't do anything
     63     """
     64     return s
     65 
     66 _comment_quote_re = re.compile(r'\-\s*\>')
     67 # Everything but \r, \n, \t:
     68 _bad_chars_re = re.compile('[\x00-\x08\x0b-\x0c\x0e-\x1f]')
     69 def comment_quote(s):
     70     """
     71     Quote that makes sure text can't escape a comment
     72     """
     73     comment = str(s)
     74     #comment = _bad_chars_re.sub('', comment)
     75     #print('in ', repr(str(s)))
     76     #print('out', repr(comment))
     77     comment = _comment_quote_re.sub('-&gt;', comment)
     78     return comment
     79 
     80 url_quote = quote
     81 url_unquote = unquote
     82 
     83 if __name__ == '__main__':
     84     import doctest
     85     doctest.testmod()
     86