"""Integration code for CSS selectors using `Soup Sieve `_ (pypi: ``soupsieve``). Acquire a `CSS` object through the `element.Tag.css` attribute of the starting point of your CSS selector, or (if you want to run a selector against the entire document) of the `BeautifulSoup` object itself. The main advantage of doing this instead of using ``soupsieve`` functions is that you don't need to keep passing the `element.Tag` to be selected against, since the `CSS` object is permanently scoped to that `element.Tag`. """ from __future__ import annotations from types import ModuleType from typing import ( Any, cast, Iterable, Iterator, Optional, TYPE_CHECKING, ) import warnings from bs4._typing import _NamespaceMapping if TYPE_CHECKING: from soupsieve import SoupSieve from bs4 import element from bs4.element import ResultSet, Tag soupsieve: Optional[ModuleType] try: import soupsieve except ImportError: soupsieve = None warnings.warn( "The soupsieve package is not installed. CSS selectors cannot be used." ) class CSS(object): """A proxy object against the ``soupsieve`` library, to simplify its CSS selector API. You don't need to instantiate this class yourself; instead, use `element.Tag.css`. :param tag: All CSS selectors run by this object will use this as their starting point. :param api: An optional drop-in replacement for the ``soupsieve`` module, intended for use in unit tests. """ def __init__(self, tag: element.Tag, api: Optional[ModuleType] = None): if api is None: api = soupsieve if api is None: raise NotImplementedError( "Cannot execute CSS selectors because the soupsieve package is not installed." ) self.api = api self.tag = tag def escape(self, ident: str) -> str: """Escape a CSS identifier. This is a simple wrapper around `soupsieve.escape() `_. See the documentation for that function for more information. """ if soupsieve is None: raise NotImplementedError( "Cannot escape CSS identifiers because the soupsieve package is not installed." ) return cast(str, self.api.escape(ident)) def _ns( self, ns: Optional[_NamespaceMapping], select: str ) -> Optional[_NamespaceMapping]: """Normalize a dictionary of namespaces.""" if not isinstance(select, self.api.SoupSieve) and ns is None: # If the selector is a precompiled pattern, it already has # a namespace context compiled in, which cannot be # replaced. ns = self.tag._namespaces return ns def _rs(self, results: Iterable[Tag]) -> ResultSet[Tag]: """Normalize a list of results to a py:class:`ResultSet`. A py:class:`ResultSet` is more consistent with the rest of Beautiful Soup's API, and :py:meth:`ResultSet.__getattr__` has a helpful error message if you try to treat a list of results as a single result (a common mistake). """ # Import here to avoid circular import from bs4 import ResultSet return ResultSet(None, results) def compile( self, select: str, namespaces: Optional[_NamespaceMapping] = None, flags: int = 0, **kwargs: Any, ) -> SoupSieve: """Pre-compile a selector and return the compiled object. :param selector: A CSS selector. :param namespaces: A dictionary mapping namespace prefixes used in the CSS selector to namespace URIs. By default, Beautiful Soup will use the prefixes it encountered while parsing the document. :param flags: Flags to be passed into Soup Sieve's `soupsieve.compile() `_ method. :param kwargs: Keyword arguments to be passed into Soup Sieve's `soupsieve.compile() `_ method. :return: A precompiled selector object. :rtype: soupsieve.SoupSieve """ return self.api.compile(select, self._ns(namespaces, select), flags, **kwargs) def select_one( self, select: str, namespaces: Optional[_NamespaceMapping] = None, flags: int = 0, **kwargs: Any, ) -> element.Tag | None: """Perform a CSS selection operation on the current Tag and return the first result, if any. This uses the Soup Sieve library. For more information, see that library's documentation for the `soupsieve.select_one() `_ method. :param selector: A CSS selector. :param namespaces: A dictionary mapping namespace prefixes used in the CSS selector to namespace URIs. By default, Beautiful Soup will use the prefixes it encountered while parsing the document. :param flags: Flags to be passed into Soup Sieve's `soupsieve.select_one() `_ method. :param kwargs: Keyword arguments to be passed into Soup Sieve's `soupsieve.select_one() `_ method. """ return self.api.select_one( select, self.tag, self._ns(namespaces, select), flags, **kwargs ) def select( self, select: str, namespaces: Optional[_NamespaceMapping] = None, limit: int = 0, flags: int = 0, **kwargs: Any, ) -> ResultSet[element.Tag]: """Perform a CSS selection operation on the current `element.Tag`. This uses the Soup Sieve library. For more information, see that library's documentation for the `soupsieve.select() `_ method. :param selector: A CSS selector. :param namespaces: A dictionary mapping namespace prefixes used in the CSS selector to namespace URIs. By default, Beautiful Soup will pass in the prefixes it encountered while parsing the document. :param limit: After finding this number of results, stop looking. :param flags: Flags to be passed into Soup Sieve's `soupsieve.select() `_ method. :param kwargs: Keyword arguments to be passed into Soup Sieve's `soupsieve.select() `_ method. """ if limit is None: limit = 0 return self._rs( self.api.select( select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs ) ) def iselect( self, select: str, namespaces: Optional[_NamespaceMapping] = None, limit: int = 0, flags: int = 0, **kwargs: Any, ) -> Iterator[element.Tag]: """Perform a CSS selection operation on the current `element.Tag`. This uses the Soup Sieve library. For more information, see that library's documentation for the `soupsieve.iselect() `_ method. It is the same as select(), but it returns a generator instead of a list. :param selector: A string containing a CSS selector. :param namespaces: A dictionary mapping namespace prefixes used in the CSS selector to namespace URIs. By default, Beautiful Soup will pass in the prefixes it encountered while parsing the document. :param limit: After finding this number of results, stop looking. :param flags: Flags to be passed into Soup Sieve's `soupsieve.iselect() `_ method. :param kwargs: Keyword arguments to be passed into Soup Sieve's `soupsieve.iselect() `_ method. """ return self.api.iselect( select, self.tag, self._ns(namespaces, select), limit, flags, **kwargs ) def closest( self, select: str, namespaces: Optional[_NamespaceMapping] = None, flags: int = 0, **kwargs: Any, ) -> Optional[element.Tag]: """Find the `element.Tag` closest to this one that matches the given selector. This uses the Soup Sieve library. For more information, see that library's documentation for the `soupsieve.closest() `_ method. :param selector: A string containing a CSS selector. :param namespaces: A dictionary mapping namespace prefixes used in the CSS selector to namespace URIs. By default, Beautiful Soup will pass in the prefixes it encountered while parsing the document. :param flags: Flags to be passed into Soup Sieve's `soupsieve.closest() `_ method. :param kwargs: Keyword arguments to be passed into Soup Sieve's `soupsieve.closest() `_ method. """ return self.api.closest( select, self.tag, self._ns(namespaces, select), flags, **kwargs ) def match( self, select: str, namespaces: Optional[_NamespaceMapping] = None, flags: int = 0, **kwargs: Any, ) -> bool: """Check whether or not this `element.Tag` matches the given CSS selector. This uses the Soup Sieve library. For more information, see that library's documentation for the `soupsieve.match() `_ method. :param: a CSS selector. :param namespaces: A dictionary mapping namespace prefixes used in the CSS selector to namespace URIs. By default, Beautiful Soup will pass in the prefixes it encountered while parsing the document. :param flags: Flags to be passed into Soup Sieve's `soupsieve.match() `_ method. :param kwargs: Keyword arguments to be passed into SoupSieve's `soupsieve.match() `_ method. """ return cast( bool, self.api.match( select, self.tag, self._ns(namespaces, select), flags, **kwargs ), ) def filter( self, select: str, namespaces: Optional[_NamespaceMapping] = None, flags: int = 0, **kwargs: Any, ) -> ResultSet[element.Tag]: """Filter this `element.Tag`'s direct children based on the given CSS selector. This uses the Soup Sieve library. It works the same way as passing a `element.Tag` into that library's `soupsieve.filter() `_ method. For more information, see the documentation for `soupsieve.filter() `_. :param namespaces: A dictionary mapping namespace prefixes used in the CSS selector to namespace URIs. By default, Beautiful Soup will pass in the prefixes it encountered while parsing the document. :param flags: Flags to be passed into Soup Sieve's `soupsieve.filter() `_ method. :param kwargs: Keyword arguments to be passed into SoupSieve's `soupsieve.filter() `_ method. """ return self._rs( self.api.filter( select, self.tag, self._ns(namespaces, select), flags, **kwargs ) )