Home | History | Annotate | Download | only in gui
      1 # Authors: John Dennis <jdennis (at] redhat.com>
      2 #
      3 # Copyright (C) 2007 Red Hat, Inc.
      4 #
      5 # This program is free software; you can redistribute it and/or modify
      6 # it under the terms of the GNU General Public License as published by
      7 # the Free Software Foundation; either version 2 of the License, or
      8 # (at your option) any later version.
      9 #
     10 # This program is distributed in the hope that it will be useful,
     11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
     12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     13 # GNU General Public License for more details.
     14 #
     15 # You should have received a copy of the GNU General Public License
     16 # along with this program; if not, write to the Free Software
     17 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
     18 #
     19 
     20 
     21 __all__ = [
     22     'escape_html',
     23     'unescape_html',
     24     'html_to_text',
     25 
     26     'html_document',
     27 ]
     28 
     29 import htmllib
     30 import formatter as Formatter
     31 import string
     32 from types import *
     33 import StringIO
     34 
     35 #------------------------------------------------------------------------------
     36 
     37 
     38 class TextWriter(Formatter.DumbWriter):
     39 
     40     def __init__(self, file=None, maxcol=80, indent_width=4):
     41         Formatter.DumbWriter.__init__(self, file, maxcol)
     42         self.indent_level = 0
     43         self.indent_width = indent_width
     44         self._set_indent()
     45 
     46     def _set_indent(self):
     47         self.indent_col = self.indent_level * self.indent_width
     48         self.indent = ' ' * self.indent_col
     49 
     50     def new_margin(self, margin, level):
     51         self.indent_level = level
     52         self._set_indent()
     53 
     54     def send_label_data(self, data):
     55         data = data + ' '
     56         if len(data) > self.indent_col:
     57             self.send_literal_data(data)
     58         else:
     59             offset = self.indent_col - len(data)
     60             self.send_literal_data(' ' * offset + data)
     61 
     62     def send_flowing_data(self, data):
     63         if not data:
     64             return
     65         atbreak = self.atbreak or data[0] in string.whitespace
     66         col = self.col
     67         maxcol = self.maxcol
     68         write = self.file.write
     69         col = self.col
     70         if col == 0:
     71             write(self.indent)
     72             col = self.indent_col
     73         for word in data.split():
     74             if atbreak:
     75                 if col + len(word) >= maxcol:
     76                     write('\n' + self.indent)
     77                     col = self.indent_col
     78                 else:
     79                     write(' ')
     80                     col = col + 1
     81             write(word)
     82             col = col + len(word)
     83             atbreak = 1
     84         self.col = col
     85         self.atbreak = data[-1] in string.whitespace
     86 
     87 
     88 class HTMLParserAnchor(htmllib.HTMLParser):
     89 
     90     def __init__(self, formatter, verbose=0):
     91         htmllib.HTMLParser.__init__(self, formatter, verbose)
     92 
     93     def anchor_bgn(self, href, name, type):
     94         self.anchor = href
     95 
     96     def anchor_end(self):
     97         if self.anchor:
     98             self.handle_data(' (%s) ' % self.anchor)
     99             self.anchor = None
    100 
    101 #------------------------------------------------------------------------------
    102 
    103 
    104 def escape_html(s):
    105     if s is None:
    106         return None
    107     s = s.replace("&", "&amp;")  # Must be done first!
    108     s = s.replace("<", "&lt;")
    109     s = s.replace(">", "&gt;")
    110     s = s.replace("'", "&apos;")
    111     s = s.replace('"', "&quot;")
    112     return s
    113 
    114 
    115 def unescape_html(s):
    116     if s is None:
    117         return None
    118     if '&' not in s:
    119         return s
    120     s = s.replace("&lt;", "<")
    121     s = s.replace("&gt;", ">")
    122     s = s.replace("&apos;", "'")
    123     s = s.replace("&quot;", '"')
    124     s = s.replace("&amp;", "&")  # Must be last
    125     return s
    126 
    127 
    128 def html_to_text(html, maxcol=80):
    129     try:
    130         buffer = StringIO.StringIO()
    131         formatter = Formatter.AbstractFormatter(TextWriter(buffer, maxcol))
    132         parser = HTMLParserAnchor(formatter)
    133         parser.feed(html)
    134         parser.close()
    135         text = buffer.getvalue()
    136         buffer.close()
    137         return text
    138     except Exception, e:
    139         log_program.error('cannot convert html to text: %s' % e)
    140         return None
    141 
    142 
    143 def html_document(*body_components):
    144     '''Wrap the body components in a HTML document structure with a valid header.
    145     Accepts a variable number of arguments of of which canb be:
    146     * string
    147     * a sequences of strings (tuple or list).
    148     * a callable object taking no parameters and returning a string or sequence of strings.
    149     '''
    150     head = '<html>\n  <head>\n    <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>\n  </head>\n  <body>\n'
    151     tail = '\n  </body>\n</html>'
    152 
    153     doc = head
    154 
    155     for body_component in body_components:
    156         if type(body_component) is StringTypes:
    157             doc += body_component
    158         elif type(body_component) in [TupleType, ListType]:
    159             for item in body_component:
    160                 doc += item
    161         elif callable(body_component):
    162             result = body_component()
    163             if type(result) in [TupleType, ListType]:
    164                 for item in result:
    165                     doc += item
    166             else:
    167                 doc += result
    168         else:
    169             doc += body_component
    170 
    171     doc += tail
    172     return doc
    173