Home | History | Annotate | Download | only in xml
      1 """
      2 A demo that reads in an RSS XML document and emits an HTML file containing
      3 a list of the individual items in the feed.
      4 """
      5 
      6 import sys
      7 import codecs
      8 
      9 from xml.sax import make_parser, handler
     10 
     11 # --- Templates
     12 
     13 top = """\
     14 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
     15 <html>
     16 <head>
     17   <title>%s</title>
     18   <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
     19 </head>
     20 
     21 <body>
     22 <h1>%s</h1>
     23 """
     24 
     25 bottom = """
     26 </ul>
     27 
     28 <hr>
     29 <address>
     30 Converted to HTML by rss2html.py.
     31 </address>
     32 
     33 </body>
     34 </html>
     35 """
     36 
     37 # --- The ContentHandler
     38 
     39 class RSSHandler(handler.ContentHandler):
     40 
     41     def __init__(self, out=sys.stdout):
     42         handler.ContentHandler.__init__(self)
     43         self._out = codecs.getwriter('utf-8')(out)
     44 
     45         self._text = ""
     46         self._parent = None
     47         self._list_started = False
     48         self._title = None
     49         self._link = None
     50         self._descr = ""
     51 
     52     # ContentHandler methods
     53 
     54     def startElement(self, name, attrs):
     55         if name == "channel" or name == "image" or name == "item":
     56             self._parent = name
     57 
     58         self._text = ""
     59 
     60     def endElement(self, name):
     61         if self._parent == "channel":
     62             if name == "title":
     63                 self._out.write(top % (self._text, self._text))
     64             elif name == "description":
     65                 self._out.write("<p>%s</p>\n" % self._text)
     66 
     67         elif self._parent == "item":
     68             if name == "title":
     69                 self._title = self._text
     70             elif name == "link":
     71                 self._link = self._text
     72             elif name == "description":
     73                 self._descr = self._text
     74             elif name == "item":
     75                 if not self._list_started:
     76                     self._out.write("<ul>\n")
     77                     self._list_started = True
     78 
     79                 self._out.write('  <li><a href="%s">%s</a> %s\n' %
     80                                 (self._link, self._title, self._descr))
     81 
     82                 self._title = None
     83                 self._link = None
     84                 self._descr = ""
     85 
     86         if name == "rss":
     87             self._out.write(bottom)
     88 
     89     def characters(self, content):
     90         self._text = self._text + content
     91 
     92 # --- Main program
     93 
     94 if __name__ == '__main__':
     95     parser = make_parser()
     96     parser.setContentHandler(RSSHandler())
     97     parser.parse(sys.argv[1])
     98