1 """ 2 A demo that reads in an RSS XML document and emits an HTML file containing 3 a list of the individual items in the feed. 4 """ 5 6 import sys 7 import codecs 8 9 from xml.sax import make_parser, handler 10 11 # --- Templates 12 13 top = """\ 14 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> 15 <html> 16 <head> 17 <title>%s</title> 18 <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> 19 </head> 20 21 <body> 22 <h1>%s</h1> 23 """ 24 25 bottom = """ 26 </ul> 27 28 <hr> 29 <address> 30 Converted to HTML by rss2html.py. 31 </address> 32 33 </body> 34 </html> 35 """ 36 37 # --- The ContentHandler 38 39 class RSSHandler(handler.ContentHandler): 40 41 def __init__(self, out=sys.stdout): 42 handler.ContentHandler.__init__(self) 43 self._out = codecs.getwriter('utf-8')(out) 44 45 self._text = "" 46 self._parent = None 47 self._list_started = False 48 self._title = None 49 self._link = None 50 self._descr = "" 51 52 # ContentHandler methods 53 54 def startElement(self, name, attrs): 55 if name == "channel" or name == "image" or name == "item": 56 self._parent = name 57 58 self._text = "" 59 60 def endElement(self, name): 61 if self._parent == "channel": 62 if name == "title": 63 self._out.write(top % (self._text, self._text)) 64 elif name == "description": 65 self._out.write("<p>%s</p>\n" % self._text) 66 67 elif self._parent == "item": 68 if name == "title": 69 self._title = self._text 70 elif name == "link": 71 self._link = self._text 72 elif name == "description": 73 self._descr = self._text 74 elif name == "item": 75 if not self._list_started: 76 self._out.write("<ul>\n") 77 self._list_started = True 78 79 self._out.write(' <li><a href="%s">%s</a> %s\n' % 80 (self._link, self._title, self._descr)) 81 82 self._title = None 83 self._link = None 84 self._descr = "" 85 86 if name == "rss": 87 self._out.write(bottom) 88 89 def characters(self, content): 90 self._text = self._text + content 91 92 # --- Main program 93 94 if __name__ == '__main__': 95 parser = make_parser() 96 parser.setContentHandler(RSSHandler()) 97 parser.parse(sys.argv[1]) 98