Source code for abstract_api.web_scraping.web_scraping

from typing import ClassVar, Optional

from ..core.bases import BaseService
from .web_scraping_response import WebScrapingResponse



[docs]
class WebScraping(BaseService[WebScrapingResponse]):
    """AbstractAPI web scraping service.

    Used to extract data from a given URL.

    Attributes:
        _subdomain: Web scraping service subdomain.
    """
    _subdomain = "scrape"
    _service_name_env_var: ClassVar[str] = "WEB_SCRAPING"


[docs]
    def scrape(
        self,
        url: str,
        render_js: Optional[bool] = None,
        block_ads: Optional[bool] = None,
        proxy_country: Optional[str] = None
    ) -> WebScrapingResponse:
        """Extracts data from the given URL.

        Args:
            url: The URL to extract the data from. Note that this parameter
                should include the full HTTP Protocol (http:// or https://).
                If your URL has parameters, you should encode it.
                For example the & character would be encoded to %26.
            render_js: If True the request will render Javascript on the
                target site. Note that Javascript is rendered via a Google
                Chrome headless browser. Defaults to False.
            block_ads: If True the request will block any advertisements it
                can identify on the target site. Defaults to False.
            proxy_country: The country to make the request from.
                The country should be submitted in the two letter,
                ISO 3166-1 alpha-2 code.

        Returns:
            WebScrapingResponse representing API call response.
        """
        return self._service_request(
            _response_class=WebScrapingResponse,
            url=url,
            render_js=render_js,
            block_ads=block_ads,
            proxy_country=proxy_country
        )