Home | History | Annotate | Download | only in gui
      1 # Authors: John Dennis <jdennis (at] redhat.com>
      2 #
      3 # Copyright (C) 2007 Red Hat, Inc.
      4 #
      5 # This program is free software; you can redistribute it and/or modify
      6 # it under the terms of the GNU General Public License as published by
      7 # the Free Software Foundation; either version 2 of the License, or
      8 # (at your option) any later version.
      9 #
     10 # This program is distributed in the hope that it will be useful,
     11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 # GNU General Public License for more details.
     14 #
     15 # You should have received a copy of the GNU General Public License
     16 # along with this program; if not, write to the Free Software
     17 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
     18 #
     19 
     20 
     21 __all__ = [
     22     'escape_html',
     23     'unescape_html',
     24     'html_to_text',
     25 
     26     'html_document',
     27 ]
     28 
     29 import htmllib
     30 import formatter as Formatter
     31 import string
     32 from types import *
     33 try:
     34     from io import StringIO
     35 except ImportError:
     36     from StringIO import StringIO
     37 
     38 #------------------------------------------------------------------------------
     39 
     40 
     41 class TextWriter(Formatter.DumbWriter):
     42 
     43     def __init__(self, file=None, maxcol=80, indent_width=4):
     44         Formatter.DumbWriter.__init__(self, file, maxcol)
     45         self.indent_level = 0
     46         self.indent_width = indent_width
     47         self._set_indent()
     48 
     49     def _set_indent(self):
     50         self.indent_col = self.indent_level * self.indent_width
     51         self.indent = ' ' * self.indent_col
     52 
     53     def new_margin(self, margin, level):
     54         self.indent_level = level
     55         self._set_indent()
     56 
     57     def send_label_data(self, data):
     58         data = data + ' '
     59         if len(data) > self.indent_col:
     60             self.send_literal_data(data)
     61         else:
     62             offset = self.indent_col - len(data)
     63             self.send_literal_data(' ' * offset + data)
     64 
     65     def send_flowing_data(self, data):
     66         if not data:
     67             return
     68         atbreak = self.atbreak or data[0] in string.whitespace
     69         col = self.col
     70         maxcol = self.maxcol
     71         write = self.file.write
     72         col = self.col
     73         if col == 0:
     74             write(self.indent)
     75             col = self.indent_col
     76         for word in data.split():
     77             if atbreak:
     78                 if col + len(word) >= maxcol:
     79                     write('\n' + self.indent)
     80                     col = self.indent_col
     81                 else:
     82                     write(' ')
     83                     col = col + 1
     84             write(word)
     85             col = col + len(word)
     86             atbreak = 1
     87         self.col = col
     88         self.atbreak = data[-1] in string.whitespace
     89 
     90 
     91 class HTMLParserAnchor(htmllib.HTMLParser):
     92 
     93     def __init__(self, formatter, verbose=0):
     94         htmllib.HTMLParser.__init__(self, formatter, verbose)
     95 
     96     def anchor_bgn(self, href, name, type):
     97         self.anchor = href
     98 
     99     def anchor_end(self):
    100         if self.anchor:
    101             self.handle_data(' (%s) ' % self.anchor)
    102             self.anchor = None
    103 
    104 #------------------------------------------------------------------------------
    105 
    106 
    107 def escape_html(s):
    108     if s is None:
    109         return None
    110     s = s.replace("&", "&amp;")  # Must be done first!
    111     s = s.replace("<", "&lt;")
    112     s = s.replace(">", "&gt;")
    113     s = s.replace("'", "&apos;")
    114     s = s.replace('"', "&quot;")
    115     return s
    116 
    117 
    118 def unescape_html(s):
    119     if s is None:
    120         return None
    121     if '&' not in s:
    122         return s
    123     s = s.replace("&lt;", "<")
    124     s = s.replace("&gt;", ">")
    125     s = s.replace("&apos;", "'")
    126     s = s.replace("&quot;", '"')
    127     s = s.replace("&amp;", "&")  # Must be last
    128     return s
    129 
    130 
    131 def html_to_text(html, maxcol=80):
    132     try:
    133         buffer = StringIO()
    134         formatter = Formatter.AbstractFormatter(TextWriter(buffer, maxcol))
    135         parser = HTMLParserAnchor(formatter)
    136         parser.feed(html)
    137         parser.close()
    138         text = buffer.getvalue()
    139         buffer.close()
    140         return text
    141     except Exception as e:
    142         log_program.error('cannot convert html to text: %s' % e)
    143         return None
    144 
    145 
    146 def html_document(*body_components):
    147     '''Wrap the body components in a HTML document structure with a valid header.
    148     Accepts a variable number of arguments of of which canb be:
    149     * string
    150     * a sequences of strings (tuple or list).
    151     * a callable object taking no parameters and returning a string or sequence of strings.
    152     '''
    153     head = '<html>\n  <head>\n    <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>\n  </head>\n  <body>\n'
    154     tail = '\n  </body>\n</html>'
    155 
    156     doc = head
    157 
    158     for body_component in body_components:
    159         if type(body_component) is StringTypes:
    160             doc += body_component
    161         elif type(body_component) in [TupleType, ListType]:
    162             for item in body_component:
    163                 doc += item
    164         elif callable(body_component):
    165             result = body_component()
    166             if type(result) in [TupleType, ListType]:
    167                 for item in result:
    168                     doc += item
    169             else:
    170                 doc += result
    171         else:
    172             doc += body_component
    173 
    174     doc += tail
    175     return doc
    176