Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from typing import AsyncIterator, Awaitable, ClassVar | |
| from playwright.async_api import ( # noqa: F401 | |
| Browser, | |
| BrowserContext, | |
| Page, | |
| TimeoutError, | |
| async_playwright, | |
| ) | |
| from .models import GetContentModel, PageModel, ScreenshotModel # noqa: TCH001 | |
| class AsyncMixin: | |
| """Experimental: making awaitable class.""" | |
| async def __ainit__(self) -> None: | |
| """Initialize the class.""" | |
| def __await__(self) -> AsyncIterator[Awaitable]: | |
| """Make the class awaitable.""" | |
| return self.__ainit__().__await__() | |
| class PlaywrightInstance(AsyncMixin): | |
| """This class is designed to keep playwright browser instance open for reusability and scalability handling api requests.""" # noqa: E501 | |
| HEADERS = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36" # noqa: E501 | |
| FIREFOX_USER_PREFS: ClassVar[dict[str, int | str]] = { | |
| "extensions.enabledScopes": 1, | |
| "extensions.autoDisableScopes": 1, | |
| "dom.webdriver.enabled": False, | |
| "useAutomationExtension": False, | |
| "general.useragent.override": HEADERS, | |
| } | |
| def __init__(self) -> None: | |
| """Initialize the class.""" | |
| self.playwright: async_playwright | None = None | |
| self.browser: Browser | None = None | |
| async def __ainit__(self) -> PlaywrightInstance: | |
| if not self.playwright: | |
| self.playwright = await async_playwright().start() | |
| self.browser = await self.playwright.firefox.launch( | |
| firefox_user_prefs=self.FIREFOX_USER_PREFS, | |
| ) | |
| return self | |
| async def new_context_page( | |
| self, | |
| browser: Browser, | |
| screenshot_model: GetContentModel, | |
| page_model: PageModel, | |
| ) -> tuple[BrowserContext | None, Page]: | |
| """create a brwoser or new browser context page. | |
| Parameters: | |
| browser (Browser): | |
| The Playwright Browser instance to create a new context in. | |
| screenshot_model (GetContentModel): | |
| A pydantic BaseModel instance containing the configuration for the screenshot. | |
| page_model (PageModel): | |
| A pydantic BaseModel instance containing the configuration for the page. | |
| Returns: | |
| tuple: BrowserContext and Page | |
| """ | |
| params = { | |
| "color_scheme": page_model.color_scheme, | |
| "java_script_enabled": page_model.java_script_enabled, | |
| "no_viewport": page_model.no_viewport, | |
| "proxy": page_model.proxy.model_dump() if page_model.proxy else None, | |
| "viewport": page_model.viewport.model_dump() if page_model.viewport else None, | |
| } | |
| if not screenshot_model.new_browser: | |
| return None, await browser.new_page(**params) | |
| new_context = await browser.new_context(**params) | |
| return new_context, await new_context.new_page() | |
| async def screenshot( | |
| self, | |
| screenshot_model: ScreenshotModel, | |
| page_model: PageModel, | |
| ) -> bytes: | |
| """Take a screenshot of a webpage url. | |
| Parameters: | |
| screenshot_model (ScreenshotModel): | |
| A pydantic BaseModel instance containing the configuration for the screenshot. | |
| page_model (PageModel): | |
| A pydantic BaseModel instance containing the configuration for the page. | |
| Returns: | |
| bytes: The screenshot data in bytes. | |
| """ | |
| context, page = await self.new_context_page( | |
| screenshot_model=screenshot_model, | |
| browser=self.browser, | |
| page_model=page_model, | |
| ) | |
| await page.goto(str(screenshot_model.url)) | |
| await page.wait_for_timeout(screenshot_model.ms_delay) | |
| screenshot_locator = ( | |
| page.locator(screenshot_model.query_selector) | |
| if screenshot_model.query_selector | |
| else None | |
| ) | |
| if screenshot_locator: | |
| screenshot_data: bytes = await screenshot_locator.screenshot( | |
| type=screenshot_model.image_type, | |
| ) | |
| else: | |
| screenshot_data: bytes = await page.screenshot( | |
| full_page=screenshot_model.full_page, | |
| type=screenshot_model.image_type, | |
| ) | |
| await page.close() | |
| if context: | |
| await context.close() | |
| return screenshot_data | |
| async def close_instance(self) -> None: | |
| """For manual closing of playwright if needed.""" | |
| if self.playwright: | |
| await self.browser.close() | |
| await self.playwright.stop() | |