Source code for abstract_api.web_scraping.web_scraping
from typing import ClassVar, Optional
from ..core.bases import BaseService
from .web_scraping_response import WebScrapingResponse
[docs]
class WebScraping(BaseService[WebScrapingResponse]):
"""AbstractAPI web scraping service.
Used to extract data from a given URL.
Attributes:
_subdomain: Web scraping service subdomain.
"""
_subdomain = "scrape"
_service_name_env_var: ClassVar[str] = "WEB_SCRAPING"
[docs]
def scrape(
self,
url: str,
render_js: Optional[bool] = None,
block_ads: Optional[bool] = None,
proxy_country: Optional[str] = None
) -> WebScrapingResponse:
"""Extracts data from the given URL.
Args:
url: The URL to extract the data from. Note that this parameter
should include the full HTTP Protocol (http:// or https://).
If your URL has parameters, you should encode it.
For example the & character would be encoded to %26.
render_js: If True the request will render Javascript on the
target site. Note that Javascript is rendered via a Google
Chrome headless browser. Defaults to False.
block_ads: If True the request will block any advertisements it
can identify on the target site. Defaults to False.
proxy_country: The country to make the request from.
The country should be submitted in the two letter,
ISO 3166-1 alpha-2 code.
Returns:
WebScrapingResponse representing API call response.
"""
return self._service_request(
_response_class=WebScrapingResponse,
url=url,
render_js=render_js,
block_ads=block_ads,
proxy_country=proxy_country
)