"""Tests of the builder registry.""" import pytest import warnings from typing import Type from bs4 import BeautifulSoup from bs4.builder import ( builder_registry as registry, TreeBuilder, TreeBuilderRegistry, ) from bs4.builder._htmlparser import HTMLParserTreeBuilder from . import ( HTML5LIB_PRESENT, LXML_PRESENT, ) if HTML5LIB_PRESENT: from bs4.builder._html5lib import HTML5TreeBuilder if LXML_PRESENT: from bs4.builder._lxml import ( LXMLTreeBuilderForXML, LXMLTreeBuilder, ) # TODO: Split out the lxml and html5lib tests into their own classes # and gate with pytest.mark.skipIf. class TestBuiltInRegistry(object): """Test the built-in registry with the default builders registered.""" def test_combination(self): assert registry.lookup("strict", "html") == HTMLParserTreeBuilder if LXML_PRESENT: assert registry.lookup("fast", "html") == LXMLTreeBuilder assert registry.lookup("permissive", "xml") == LXMLTreeBuilderForXML if HTML5LIB_PRESENT: assert registry.lookup("html5lib", "html") == HTML5TreeBuilder def test_lookup_by_markup_type(self): if LXML_PRESENT: assert registry.lookup("html") == LXMLTreeBuilder assert registry.lookup("xml") == LXMLTreeBuilderForXML else: assert registry.lookup("xml") is None if HTML5LIB_PRESENT: assert registry.lookup("html") == HTML5TreeBuilder else: assert registry.lookup("html") == HTMLParserTreeBuilder def test_named_library(self): if LXML_PRESENT: assert registry.lookup("lxml", "xml") == LXMLTreeBuilderForXML assert registry.lookup("lxml", "html") == LXMLTreeBuilder if HTML5LIB_PRESENT: assert registry.lookup("html5lib") == HTML5TreeBuilder assert registry.lookup("html.parser") == HTMLParserTreeBuilder def test_beautifulsoup_constructor_does_lookup(self): with warnings.catch_warnings(record=True): # This will create a warning about not explicitly # specifying a parser, but we'll ignore it. # You can pass in a string. BeautifulSoup("", features="html") # Or a list of strings. BeautifulSoup("", features=["html", "fast"]) pass # You'll get an exception if BS can't find an appropriate # builder. with pytest.raises(ValueError): BeautifulSoup("", features="no-such-feature") class TestRegistry(object): """Test the TreeBuilderRegistry class in general.""" def setup_method(self): self.registry = TreeBuilderRegistry() def builder_for_features(self, *feature_list: str) -> Type[TreeBuilder]: cls = type( "Builder_" + "_".join(feature_list), (object,), {"features": feature_list} ) self.registry.register(cls) return cls def test_register_with_no_features(self): builder = self.builder_for_features() # Since the builder advertises no features, you can't find it # by looking up features. assert self.registry.lookup("foo") is None # But you can find it by doing a lookup with no features, if # this happens to be the only registered builder. assert self.registry.lookup() == builder def test_register_with_features_makes_lookup_succeed(self): builder = self.builder_for_features("foo", "bar") assert self.registry.lookup("foo") is builder assert self.registry.lookup("bar") is builder def test_lookup_fails_when_no_builder_implements_feature(self): assert self.registry.lookup("baz") is None def test_lookup_gets_most_recent_registration_when_no_feature_specified(self): self.builder_for_features("foo") builder2 = self.builder_for_features("bar") assert self.registry.lookup() == builder2 def test_lookup_fails_when_no_tree_builders_registered(self): assert self.registry.lookup() is None def test_lookup_gets_most_recent_builder_supporting_all_features(self): self.builder_for_features("foo") self.builder_for_features("bar") has_both_early = self.builder_for_features("foo", "bar", "baz") has_both_late = self.builder_for_features("foo", "bar", "quux") self.builder_for_features("bar") self.builder_for_features("foo") # There are two builders featuring 'foo' and 'bar', but # the one that also features 'quux' was registered later. assert self.registry.lookup("foo", "bar") == has_both_late # There is only one builder featuring 'foo', 'bar', and 'baz'. assert self.registry.lookup("foo", "bar", "baz") == has_both_early def test_lookup_fails_when_cannot_reconcile_requested_features(self): self.builder_for_features("foo", "bar") self.builder_for_features("foo", "baz") assert self.registry.lookup("bar", "baz") is None