Home | History | Annotate | Download | only in tests
      1 """Tests of the builder registry."""
      2 
      3 import unittest
      4 
      5 from bs4 import BeautifulSoup
      6 from bs4.builder import (
      7     builder_registry as registry,
      8     HTMLParserTreeBuilder,
      9     TreeBuilderRegistry,
     10 )
     11 
     12 try:
     13     from bs4.builder import HTML5TreeBuilder
     14     HTML5LIB_PRESENT = True
     15 except ImportError:
     16     HTML5LIB_PRESENT = False
     17 
     18 try:
     19     from bs4.builder import (
     20         LXMLTreeBuilderForXML,
     21         LXMLTreeBuilder,
     22         )
     23     LXML_PRESENT = True
     24 except ImportError:
     25     LXML_PRESENT = False
     26 
     27 
     28 class BuiltInRegistryTest(unittest.TestCase):
     29     """Test the built-in registry with the default builders registered."""
     30 
     31     def test_combination(self):
     32         if LXML_PRESENT:
     33             self.assertEqual(registry.lookup('fast', 'html'),
     34                              LXMLTreeBuilder)
     35 
     36         if LXML_PRESENT:
     37             self.assertEqual(registry.lookup('permissive', 'xml'),
     38                              LXMLTreeBuilderForXML)
     39         self.assertEqual(registry.lookup('strict', 'html'),
     40                           HTMLParserTreeBuilder)
     41         if HTML5LIB_PRESENT:
     42             self.assertEqual(registry.lookup('html5lib', 'html'),
     43                               HTML5TreeBuilder)
     44 
     45     def test_lookup_by_markup_type(self):
     46         if LXML_PRESENT:
     47             self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
     48             self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
     49         else:
     50             self.assertEqual(registry.lookup('xml'), None)
     51             if HTML5LIB_PRESENT:
     52                 self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
     53             else:
     54                 self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
     55 
     56     def test_named_library(self):
     57         if LXML_PRESENT:
     58             self.assertEqual(registry.lookup('lxml', 'xml'),
     59                              LXMLTreeBuilderForXML)
     60             self.assertEqual(registry.lookup('lxml', 'html'),
     61                              LXMLTreeBuilder)
     62         if HTML5LIB_PRESENT:
     63             self.assertEqual(registry.lookup('html5lib'),
     64                               HTML5TreeBuilder)
     65 
     66         self.assertEqual(registry.lookup('html.parser'),
     67                           HTMLParserTreeBuilder)
     68 
     69     def test_beautifulsoup_constructor_does_lookup(self):
     70         # You can pass in a string.
     71         BeautifulSoup("", features="html")
     72         # Or a list of strings.
     73         BeautifulSoup("", features=["html", "fast"])
     74 
     75         # You'll get an exception if BS can't find an appropriate
     76         # builder.
     77         self.assertRaises(ValueError, BeautifulSoup,
     78                           "", features="no-such-feature")
     79 
     80 class RegistryTest(unittest.TestCase):
     81     """Test the TreeBuilderRegistry class in general."""
     82 
     83     def setUp(self):
     84         self.registry = TreeBuilderRegistry()
     85 
     86     def builder_for_features(self, *feature_list):
     87         cls = type('Builder_' + '_'.join(feature_list),
     88                    (object,), {'features' : feature_list})
     89 
     90         self.registry.register(cls)
     91         return cls
     92 
     93     def test_register_with_no_features(self):
     94         builder = self.builder_for_features()
     95 
     96         # Since the builder advertises no features, you can't find it
     97         # by looking up features.
     98         self.assertEqual(self.registry.lookup('foo'), None)
     99 
    100         # But you can find it by doing a lookup with no features, if
    101         # this happens to be the only registered builder.
    102         self.assertEqual(self.registry.lookup(), builder)
    103 
    104     def test_register_with_features_makes_lookup_succeed(self):
    105         builder = self.builder_for_features('foo', 'bar')
    106         self.assertEqual(self.registry.lookup('foo'), builder)
    107         self.assertEqual(self.registry.lookup('bar'), builder)
    108 
    109     def test_lookup_fails_when_no_builder_implements_feature(self):
    110         builder = self.builder_for_features('foo', 'bar')
    111         self.assertEqual(self.registry.lookup('baz'), None)
    112 
    113     def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
    114         builder1 = self.builder_for_features('foo')
    115         builder2 = self.builder_for_features('bar')
    116         self.assertEqual(self.registry.lookup(), builder2)
    117 
    118     def test_lookup_fails_when_no_tree_builders_registered(self):
    119         self.assertEqual(self.registry.lookup(), None)
    120 
    121     def test_lookup_gets_most_recent_builder_supporting_all_features(self):
    122         has_one = self.builder_for_features('foo')
    123         has_the_other = self.builder_for_features('bar')
    124         has_both_early = self.builder_for_features('foo', 'bar', 'baz')
    125         has_both_late = self.builder_for_features('foo', 'bar', 'quux')
    126         lacks_one = self.builder_for_features('bar')
    127         has_the_other = self.builder_for_features('foo')
    128 
    129         # There are two builders featuring 'foo' and 'bar', but
    130         # the one that also features 'quux' was registered later.
    131         self.assertEqual(self.registry.lookup('foo', 'bar'),
    132                           has_both_late)
    133 
    134         # There is only one builder featuring 'foo', 'bar', and 'baz'.
    135         self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
    136                           has_both_early)
    137 
    138     def test_lookup_fails_when_cannot_reconcile_requested_features(self):
    139         builder1 = self.builder_for_features('foo', 'bar')
    140         builder2 = self.builder_for_features('foo', 'baz')
    141         self.assertEqual(self.registry.lookup('bar', 'baz'), None)
    142