Home | History | Annotate | Download | only in gui
      1 # Authors: John Dennis <jdennis (at] redhat.com>
      2 #
      3 # Copyright (C) 2007 Red Hat, Inc.
      4 #
      5 # This program is free software; you can redistribute it and/or modify
      6 # it under the terms of the GNU General Public License as published by
      7 # the Free Software Foundation; either version 2 of the License, or
      8 # (at your option) any later version.
      9 #
     10 # This program is distributed in the hope that it will be useful,
     11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 # GNU General Public License for more details.
     14 #
     15 # You should have received a copy of the GNU General Public License
     16 # along with this program; if not, write to the Free Software
     17 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
     18 #
     19 
     20 
     21 __all__ = [
     22     'escape_html',
     23     'unescape_html',
     24     'html_to_text',
     25 
     26     'html_document',
     27 ]
     28 
     29 import htmllib
     30 import formatter as Formatter
     31 import string
     32 from types import *
     33 import StringIO
     34 
     35 #------------------------------------------------------------------------------
     36 
     37 class TextWriter(Formatter.DumbWriter):
     38     def __init__(self, file=None, maxcol=80, indent_width=4):
     39         Formatter.DumbWriter.__init__(self, file, maxcol)
     40         self.indent_level = 0
     41         self.indent_width = indent_width
     42         self._set_indent()
     43 
     44     def _set_indent(self):
     45         self.indent_col = self.indent_level * self.indent_width
     46         self.indent = ' ' * self.indent_col
     47 
     48     def new_margin(self, margin, level):
     49         self.indent_level = level
     50         self._set_indent()
     51 
     52     def send_label_data(self, data):
     53         data = data + ' '
     54         if len(data) > self.indent_col:
     55             self.send_literal_data(data)
     56         else:
     57             offset = self.indent_col - len(data)
     58             self.send_literal_data(' ' * offset + data)
     59 
     60     def send_flowing_data(self, data):
     61         if not data: return
     62         atbreak = self.atbreak or data[0] in string.whitespace
     63         col = self.col
     64         maxcol = self.maxcol
     65         write = self.file.write
     66         col = self.col
     67         if col == 0:
     68             write(self.indent)
     69             col = self.indent_col
     70         for word in data.split():
     71             if atbreak:
     72                 if col + len(word) >= maxcol:
     73                     write('\n' + self.indent)
     74                     col = self.indent_col
     75                 else:
     76                     write(' ')
     77                     col = col + 1
     78             write(word)
     79             col = col + len(word)
     80             atbreak = 1
     81         self.col = col
     82         self.atbreak = data[-1] in string.whitespace
     83 
     84 class HTMLParserAnchor(htmllib.HTMLParser):
     85 
     86     def __init__(self, formatter, verbose=0):
     87         htmllib.HTMLParser.__init__(self, formatter, verbose)
     88 
     89     def anchor_bgn(self, href, name, type):
     90         self.anchor = href
     91 
     92     def anchor_end(self):
     93         if self.anchor:
     94             self.handle_data(' (%s) ' % self.anchor)
     95             self.anchor = None
     96 
     97 #------------------------------------------------------------------------------
     98 
     99 def escape_html(s):
    100     if s is None: return None
    101     s = s.replace("&", "&amp;") # Must be done first!
    102     s = s.replace("<", "&lt;")
    103     s = s.replace(">", "&gt;")
    104     s = s.replace("'", "&apos;")
    105     s = s.replace('"', "&quot;")
    106     return s
    107 
    108 
    109 def unescape_html(s):
    110     if s is None: return None
    111     if '&' not in s:
    112         return s
    113     s = s.replace("&lt;", "<")
    114     s = s.replace("&gt;", ">")
    115     s = s.replace("&apos;", "'")
    116     s = s.replace("&quot;", '"')
    117     s = s.replace("&amp;", "&") # Must be last
    118     return s
    119 
    120 def html_to_text(html, maxcol=80):
    121     try:
    122         buffer = StringIO.StringIO()
    123         formatter = Formatter.AbstractFormatter(TextWriter(buffer, maxcol))
    124         parser = HTMLParserAnchor(formatter)
    125         parser.feed(html)
    126         parser.close()
    127         text = buffer.getvalue()
    128         buffer.close()
    129         return text
    130     except Exception, e:
    131         log_program.error('cannot convert html to text: %s' % e)
    132         return None
    133 
    134 def html_document(*body_components):
    135     '''Wrap the body components in a HTML document structure with a valid header.
    136     Accepts a variable number of arguments of of which canb be:
    137     * string
    138     * a sequences of strings (tuple or list).
    139     * a callable object taking no parameters and returning a string or sequence of strings.
    140     '''
    141     head = '<html>\n  <head>\n    <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>\n  </head>\n  <body>\n'
    142     tail = '\n  </body>\n</html>'
    143 
    144     doc = head
    145 
    146     for body_component in body_components:
    147         if type(body_component) is StringTypes:
    148             doc += body_component
    149         elif type(body_component) in [TupleType, ListType]:
    150             for item in body_component:
    151                 doc += item
    152         elif callable(body_component):
    153             result = body_component()
    154             if type(result) in [TupleType, ListType]:
    155                 for item in result:
    156                     doc += item
    157             else:
    158                 doc += result
    159         else:
    160             doc += body_component
    161 
    162     doc += tail
    163     return doc
    164