1 import formatter 2 import unittest 3 4 from test import test_support 5 htmllib = test_support.import_module('htmllib', deprecated=True) 6 7 8 class AnchorCollector(htmllib.HTMLParser): 9 def __init__(self, *args, **kw): 10 self.__anchors = [] 11 htmllib.HTMLParser.__init__(self, *args, **kw) 12 13 def get_anchor_info(self): 14 return self.__anchors 15 16 def anchor_bgn(self, *args): 17 self.__anchors.append(args) 18 19 class DeclCollector(htmllib.HTMLParser): 20 def __init__(self, *args, **kw): 21 self.__decls = [] 22 htmllib.HTMLParser.__init__(self, *args, **kw) 23 24 def get_decl_info(self): 25 return self.__decls 26 27 def unknown_decl(self, data): 28 self.__decls.append(data) 29 30 31 class HTMLParserTestCase(unittest.TestCase): 32 def test_anchor_collection(self): 33 # See SF bug #467059. 34 parser = AnchorCollector(formatter.NullFormatter(), verbose=1) 35 parser.feed( 36 """<a href='http://foo.org/' name='splat'> </a> 37 <a href='http://www.python.org/'> </a> 38 <a name='frob'> </a> 39 """) 40 parser.close() 41 self.assertEqual(parser.get_anchor_info(), 42 [('http://foo.org/', 'splat', ''), 43 ('http://www.python.org/', '', ''), 44 ('', 'frob', ''), 45 ]) 46 47 def test_decl_collection(self): 48 # See SF patch #545300 49 parser = DeclCollector(formatter.NullFormatter(), verbose=1) 50 parser.feed( 51 """<html> 52 <body> 53 hallo 54 <![if !supportEmptyParas]> <![endif]> 55 </body> 56 </html> 57 """) 58 parser.close() 59 self.assertEqual(parser.get_decl_info(), 60 ["if !supportEmptyParas", 61 "endif" 62 ]) 63 64 def test_main(): 65 test_support.run_unittest(HTMLParserTestCase) 66 67 68 if __name__ == "__main__": 69 test_main() 70