From 24a6fa88aa82ff97da814570a5bfacec9d44ebf7 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Wed, 27 May 2026 13:00:56 +0200 Subject: [PATCH] refactor!: adapt to apify-client v3 Bump apify-client to v3 (>=3.0.0,<4.0.0) and re-use its models, literals, and constants instead of maintaining SDK-side duplicates. - Replace SDK models with apify-client equivalents (e.g. `Run` instead of `ActorRun`) and import pricing models from the client; remove the now redundant `src/apify/_models.py`. - Drop the `apify-shared` dependency and define the few needed constants locally in `_consts.py`. - Adapt `Actor.start`/`call`/`call_task` to the v3 `run_timeout` API and tolerate platform pricing-info env var omissions. - Adapt the Apify storage clients (dataset, key-value store, request queue) to the v3 client surface. - Refresh `uv.lock` (apify-client 3.0.2, drops apify-shared, adds dnspython/email-validator). - Align unit, integration, and e2e tests with the v3 type and model changes. BREAKING CHANGE: requires apify-client v3; SDK-side model and literal duplicates are removed in favor of those exported by apify-client. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/02_concepts/code/07_webhook.py | 7 +- .../02_concepts/code/07_webhook_preventing.py | 10 +- docs/04_upgrading/upgrading_to_v3.md | 35 +++ pyproject.toml | 7 +- src/apify/__init__.py | 9 +- src/apify/_actor.py | 153 +++++----- src/apify/_charging.py | 17 +- src/apify/_configuration.py | 49 +++- src/apify/_consts.py | 79 +++++- src/apify/_models.py | 266 ------------------ src/apify/_proxy_configuration.py | 7 +- .../_apify/_alias_resolving.py | 10 +- .../_apify/_api_client_creation.py | 10 +- .../storage_clients/_apify/_dataset_client.py | 15 +- .../_apify/_key_value_store_client.py | 23 +- src/apify/storage_clients/_apify/_models.py | 78 ++--- .../_apify/_request_queue_client.py | 38 +-- .../_apify/_request_queue_shared_client.py | 46 +-- .../_apify/_request_queue_single_client.py | 50 ++-- src/apify/storage_clients/_apify/_utils.py | 21 ++ tests/e2e/conftest.py | 85 ++++-- tests/e2e/test_actor_api_helpers.py | 91 +++--- tests/e2e/test_actor_charge.py | 51 ++-- tests/e2e/test_actor_events.py | 57 +++- tests/e2e/test_actor_lifecycle.py | 54 +++- tests/e2e/test_actor_request_queue.py | 9 +- tests/e2e/test_actor_scrapy.py | 4 +- tests/e2e/test_crawlee/conftest.py | 7 +- tests/e2e/test_fixtures.py | 2 +- tests/e2e/test_scrapy/conftest.py | 7 +- tests/integration/conftest.py | 5 +- tests/integration/test_dataset.py | 5 +- tests/integration/test_key_value_store.py | 4 +- tests/integration/test_request_queue.py | 102 ++++--- tests/unit/actor/test_actor_charge.py | 19 +- .../test_actor_create_proxy_configuration.py | 4 +- tests/unit/actor/test_actor_env_helpers.py | 82 +++++- tests/unit/actor/test_actor_helpers.py | 104 +++---- .../unit/actor/test_actor_key_value_store.py | 3 +- tests/unit/actor/test_actor_lifecycle.py | 98 ++++++- tests/unit/actor/test_charging_manager.py | 14 +- tests/unit/actor/test_configuration.py | 44 ++- tests/unit/conftest.py | 3 +- tests/unit/events/test_apify_event_manager.py | 2 +- .../storage_clients/test_apify_kvs_client.py | 71 +++-- .../test_apify_request_queue_client.py | 10 +- tests/unit/test_proxy_configuration.py | 12 +- uv.lock | 52 ++-- 48 files changed, 1115 insertions(+), 816 deletions(-) delete mode 100644 src/apify/_models.py diff --git a/docs/02_concepts/code/07_webhook.py b/docs/02_concepts/code/07_webhook.py index 3dd48b13..f2a2e828 100644 --- a/docs/02_concepts/code/07_webhook.py +++ b/docs/02_concepts/code/07_webhook.py @@ -1,14 +1,15 @@ import asyncio -from apify import Actor, Webhook, WebhookEventType +from apify import Actor, WebhookCondition, WebhookCreate async def main() -> None: async with Actor: # Create a webhook that will be triggered when the Actor run fails. - webhook = Webhook( - event_types=[WebhookEventType.ACTOR_RUN_FAILED], + webhook = WebhookCreate( + event_types=['ACTOR.RUN.FAILED'], request_url='https://example.com/run-failed', + condition=WebhookCondition(), ) # Add the webhook to the Actor. diff --git a/docs/02_concepts/code/07_webhook_preventing.py b/docs/02_concepts/code/07_webhook_preventing.py index ec2334e3..e68c704b 100644 --- a/docs/02_concepts/code/07_webhook_preventing.py +++ b/docs/02_concepts/code/07_webhook_preventing.py @@ -1,18 +1,20 @@ import asyncio -from apify import Actor, Webhook, WebhookEventType +from apify import Actor, WebhookCondition, WebhookCreate async def main() -> None: async with Actor: # Create a webhook that will be triggered when the Actor run fails. - webhook = Webhook( - event_types=[WebhookEventType.ACTOR_RUN_FAILED], + webhook = WebhookCreate( + event_types=['ACTOR.RUN.FAILED'], request_url='https://example.com/run-failed', + condition=WebhookCondition(), + idempotency_key=Actor.configuration.actor_run_id, ) # Add the webhook to the Actor. - await Actor.add_webhook(webhook, idempotency_key=Actor.configuration.actor_run_id) + await Actor.add_webhook(webhook) # Raise an error to simulate a failed run. raise RuntimeError('I am an error and I know it!') diff --git a/docs/04_upgrading/upgrading_to_v3.md b/docs/04_upgrading/upgrading_to_v3.md index 729ad68b..693cc87f 100644 --- a/docs/04_upgrading/upgrading_to_v3.md +++ b/docs/04_upgrading/upgrading_to_v3.md @@ -67,6 +67,41 @@ Some changes in the related model classes: - `stats` field in `RequestQueueMetadata` - removed as it was unused. - `RequestQueueHead` - replaced by `RequestQueueHeadWithLocks`. +## Webhook model changes + +The SDK no longer ships its own `Webhook` Pydantic model. The webhook models from `apify-client` are now used directly: + +- `apify.Webhook` is now an alias of `apify_client._models.WebhookRepresentation`. It still has the same shape used in `Actor.start`, `Actor.call`, and `Actor.call_task` (`event_types`, `request_url`, `payload_template`, `headers_template`). +- `Actor.add_webhook` now accepts an `apify.WebhookCreate` instance instead of `apify.Webhook` + separate kwargs. All fields (`ignore_ssl_errors`, `do_not_retry`, `idempotency_key`, `headers_template`, etc.) move onto the `WebhookCreate` instance. `condition` is required by the underlying type — pass `WebhookCondition()` when you do not need to scope to other resources. + +**Before (v2.x):** + +```python +from apify import Actor, Webhook + +await Actor.add_webhook( + Webhook(event_types=['ACTOR.RUN.FAILED'], request_url='https://example.com'), + idempotency_key='my-key', +) +``` + +**Now (v3.0):** + +```python +from apify import Actor, WebhookCondition, WebhookCreate + +await Actor.add_webhook( + WebhookCreate( + event_types=['ACTOR.RUN.FAILED'], + request_url='https://example.com', + condition=WebhookCondition(), + idempotency_key='my-key', + ) +) +``` + +`WebhookCondition`, `WebhookCreate`, `WebhookRepresentation`, `WebhookEventType`, and `ActorPermissionLevel` are all re-exported from `apify` for convenience. + ## Removed Actor.config property - `Actor.config` property has been removed. Use `Actor.configuration` instead. diff --git a/pyproject.toml b/pyproject.toml index 5f18cea6..29230918 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,15 +34,14 @@ keywords = [ "scraping", ] dependencies = [ - "apify-client>=2.3.0,<3.0.0", - "apify-shared>=2.0.0,<3.0.0", + "apify-client>=3.0.0,<4.0.0", "crawlee>=1.0.4,<2.0.0", "cachetools>=5.5.0", "cryptography>=42.0.0", "impit>=0.8.0", "lazy-object-proxy>=1.11.0", "more_itertools>=10.2.0", - "pydantic>=2.11.0", + "pydantic[email]>=2.11.0", "typing-extensions>=4.1.0", "websockets>=14.0", "yarl>=1.18.0", @@ -197,7 +196,7 @@ builtins-ignorelist = ["id"] [tool.ruff.lint.isort] known-local-folder = ["apify"] -known-first-party = ["apify_client", "apify_shared", "crawlee"] +known-first-party = ["apify_client", "crawlee"] [tool.ruff.lint.pylint] max-branches = 18 diff --git a/src/apify/__init__.py b/src/apify/__init__.py index f6495d55..cc45e368 100644 --- a/src/apify/__init__.py +++ b/src/apify/__init__.py @@ -1,6 +1,8 @@ from importlib import metadata -from apify_shared.consts import WebhookEventType +from apify_client._literals import ActorPermissionLevel, WebhookEventType +from apify_client._models import WebhookCondition, WebhookCreate, WebhookRepresentation +from apify_client._models import WebhookRepresentation as Webhook from crawlee import Request from crawlee.events import ( Event, @@ -14,13 +16,13 @@ from apify._actor import Actor from apify._configuration import Configuration -from apify._models import Webhook from apify._proxy_configuration import ProxyConfiguration, ProxyInfo __version__ = metadata.version('apify') __all__ = [ 'Actor', + 'ActorPermissionLevel', 'Configuration', 'Event', 'EventAbortingData', @@ -33,6 +35,9 @@ 'ProxyInfo', 'Request', 'Webhook', + 'WebhookCondition', + 'WebhookCreate', 'WebhookEventType', + 'WebhookRepresentation', '__version__', ] diff --git a/src/apify/_actor.py b/src/apify/_actor.py index 69882386..ced34d3f 100644 --- a/src/apify/_actor.py +++ b/src/apify/_actor.py @@ -13,7 +13,6 @@ from pydantic import AliasChoices from apify_client import ApifyClientAsync -from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars from crawlee import service_locator from crawlee.errors import ServiceConflictError from crawlee.events import ( @@ -28,9 +27,8 @@ from apify._charging import DEFAULT_DATASET_ITEM_EVENT, ChargeResult, ChargingManager, ChargingManagerImplementation from apify._configuration import Configuration -from apify._consts import EVENT_LISTENERS_TIMEOUT +from apify._consts import EVENT_LISTENERS_TIMEOUT, ActorEnvVars, ActorExitCodes, ApifyEnvVars from apify._crypto import decrypt_input_secrets, load_private_key -from apify._models import ActorRun from apify._proxy_configuration import ProxyConfiguration from apify._utils import docs_group, docs_name, ensure_context, get_system_info, is_running_in_ipython from apify.events import ApifyEventManager, EventManager, LocalEventManager @@ -46,12 +44,12 @@ from types import TracebackType from typing import Self - from apify_shared.consts import ActorPermissionLevel + from apify_client._literals import ActorPermissionLevel + from apify_client._models import Run, WebhookCreate + from apify_client._models import WebhookRepresentation as Webhook from crawlee._types import JsonSerializable from crawlee.proxy_configuration import _NewUrlFunction - from apify._models import Webhook - MainReturnType = TypeVar('MainReturnType') _ensure_context = ensure_context('_active') @@ -507,17 +505,21 @@ def new_client( (increases exponentially from this value). timeout: The socket timeout of the HTTP requests sent to the Apify API. """ - token = token or self.configuration.token - api_url = api_url or self.configuration.api_base_url - return ApifyClientAsync( - token=token, - api_url=api_url, - max_retries=max_retries, - min_delay_between_retries_millis=int(min_delay_between_retries.total_seconds() * 1000) - if min_delay_between_retries is not None - else None, - timeout_secs=int(timeout.total_seconds()) if timeout else None, - ) + kwargs: dict[str, Any] = { + 'token': token or self.configuration.token, + 'api_url': api_url or self.configuration.api_base_url, + } + + if max_retries is not None: + kwargs['max_retries'] = max_retries + + if min_delay_between_retries is not None: + kwargs['min_delay_between_retries_millis'] = int(min_delay_between_retries.total_seconds() * 1000) + + if timeout is not None: + kwargs['timeout_secs'] = int(timeout.total_seconds()) + + return ApifyClientAsync(**kwargs) @_ensure_context async def open_dataset( @@ -871,7 +873,7 @@ async def start( force_permission_level: ActorPermissionLevel | None = None, wait_for_finish: int | None = None, webhooks: list[Webhook] | None = None, - ) -> ActorRun: + ) -> Run: """Run an Actor on the Apify platform. Unlike `Actor.call`, this method just starts the run without waiting for finish. @@ -904,13 +906,6 @@ async def start( """ client = self.new_client(token=token) if token else self.apify_client - if webhooks: - serialized_webhooks = [ - hook.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True) for hook in webhooks - ] - else: - serialized_webhooks = None - if timeout in {'inherit', 'RemainingTime'}: if timeout == 'RemainingTime': warnings.warn( @@ -928,20 +923,24 @@ async def start( f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, `"RemainingTime"`, or a `timedelta`.' ) - api_result = await client.actor(actor_id).start( + actor_client = client.actor(actor_id) + run = await actor_client.start( run_input=run_input, content_type=content_type, build=build, max_total_charge_usd=max_total_charge_usd, restart_on_error=restart_on_error, memory_mbytes=memory_mbytes, - timeout_secs=int(actor_start_timeout.total_seconds()) if actor_start_timeout is not None else None, + run_timeout=actor_start_timeout, force_permission_level=force_permission_level, wait_for_finish=wait_for_finish, - webhooks=serialized_webhooks, + webhooks=webhooks, ) - return ActorRun.model_validate(api_result) + if run is None: + raise RuntimeError(f'Failed to start Actor with ID "{actor_id}".') + + return run @_ensure_context async def abort( @@ -951,7 +950,7 @@ async def abort( token: str | None = None, status_message: str | None = None, gracefully: bool | None = None, - ) -> ActorRun: + ) -> Run: """Abort given Actor run on the Apify platform using the current user account. The user account is determined by the `APIFY_TOKEN` environment variable. @@ -968,13 +967,17 @@ async def abort( Info about the aborted Actor run. """ client = self.new_client(token=token) if token else self.apify_client + run_client = client.run(run_id) if status_message: - await client.run(run_id).update(status_message=status_message) + await run_client.update(status_message=status_message) - api_result = await client.run(run_id).abort(gracefully=gracefully) + run = await run_client.abort(gracefully=gracefully) - return ActorRun.model_validate(api_result) + if run is None: + raise RuntimeError(f'Failed to abort Actor run with ID "{run_id}".') + + return run @_ensure_context async def call( @@ -993,7 +996,7 @@ async def call( webhooks: list[Webhook] | None = None, wait: timedelta | None = None, logger: logging.Logger | None | Literal['default'] = 'default', - ) -> ActorRun | None: + ) -> Run | None: """Start an Actor on the Apify Platform and wait for it to finish before returning. It waits indefinitely, unless the wait argument is provided. @@ -1029,13 +1032,6 @@ async def call( """ client = self.new_client(token=token) if token else self.apify_client - if webhooks: - serialized_webhooks = [ - hook.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True) for hook in webhooks - ] - else: - serialized_webhooks = None - if timeout in {'inherit', 'RemainingTime'}: if timeout == 'RemainingTime': warnings.warn( @@ -1054,21 +1050,25 @@ async def call( f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, `"RemainingTime"`, or a `timedelta`.' ) - api_result = await client.actor(actor_id).call( + actor_client = client.actor(actor_id) + run = await actor_client.call( run_input=run_input, content_type=content_type, build=build, max_total_charge_usd=max_total_charge_usd, restart_on_error=restart_on_error, memory_mbytes=memory_mbytes, - timeout_secs=int(actor_call_timeout.total_seconds()) if actor_call_timeout is not None else None, + run_timeout=actor_call_timeout, force_permission_level=force_permission_level, - webhooks=serialized_webhooks, - wait_secs=int(wait.total_seconds()) if wait is not None else None, + webhooks=webhooks, + wait_duration=wait, logger=logger, ) - return ActorRun.model_validate(api_result) + if run is None: + raise RuntimeError(f'Failed to call Actor with ID "{actor_id}".') + + return run @_ensure_context async def call_task( @@ -1083,7 +1083,7 @@ async def call_task( webhooks: list[Webhook] | None = None, wait: timedelta | None = None, token: str | None = None, - ) -> ActorRun | None: + ) -> Run | None: """Start an Actor task on the Apify Platform and wait for it to finish before returning. It waits indefinitely, unless the wait argument is provided. @@ -1116,13 +1116,6 @@ async def call_task( """ client = self.new_client(token=token) if token else self.apify_client - if webhooks: - serialized_webhooks = [ - hook.model_dump(by_alias=True, exclude_unset=True, exclude_defaults=True) for hook in webhooks - ] - else: - serialized_webhooks = None - if timeout == 'inherit': task_call_timeout = self._get_remaining_time() elif timeout is None: @@ -1132,17 +1125,21 @@ async def call_task( else: raise ValueError(f'Invalid timeout {timeout!r}: expected `None`, `"inherit"`, or a `timedelta`.') - api_result = await client.task(task_id).call( + task_client = client.task(task_id) + run = await task_client.call( task_input=task_input, build=build, restart_on_error=restart_on_error, memory_mbytes=memory_mbytes, - timeout_secs=int(task_call_timeout.total_seconds()) if task_call_timeout is not None else None, - webhooks=serialized_webhooks, - wait_secs=int(wait.total_seconds()) if wait is not None else None, + run_timeout=task_call_timeout, + webhooks=webhooks, + wait_duration=wait, ) - return ActorRun.model_validate(api_result) + if run is None: + raise RuntimeError(f'Failed to call Task with ID "{task_id}".') + + return run @_ensure_context async def metamorph( @@ -1257,14 +1254,7 @@ async def reboot( await asyncio.sleep(custom_after_sleep.total_seconds()) @_ensure_context - async def add_webhook( - self, - webhook: Webhook, - *, - ignore_ssl_errors: bool | None = None, - do_not_retry: bool | None = None, - idempotency_key: str | None = None, - ) -> None: + async def add_webhook(self, webhook: WebhookCreate) -> None: """Create an ad-hoc webhook for the current Actor run. This webhook lets you receive a notification when the Actor run finished or failed. @@ -1275,11 +1265,7 @@ async def add_webhook( For more information about Apify Actor webhooks, please see the [documentation](https://docs.apify.com/webhooks). Args: - webhook: The webhook to be added - ignore_ssl_errors: Whether the webhook should ignore SSL errors returned by request_url - do_not_retry: Whether the webhook should retry sending the payload to request_url upon failure. - idempotency_key: A unique identifier of a webhook. You can use it to ensure that you won't create - the same webhook multiple times. + webhook: The webhook to be added. The `condition.actor_run_id` is automatically set to the current run. Returns: The created webhook. @@ -1297,9 +1283,11 @@ async def add_webhook( event_types=webhook.event_types, request_url=webhook.request_url, payload_template=webhook.payload_template, - ignore_ssl_errors=ignore_ssl_errors, - do_not_retry=do_not_retry, - idempotency_key=idempotency_key, + headers_template=webhook.headers_template, + ignore_ssl_errors=webhook.ignore_ssl_errors, + do_not_retry=webhook.do_not_retry, + idempotency_key=webhook.idempotency_key, + is_ad_hoc=webhook.is_ad_hoc if webhook.is_ad_hoc is not None else True, ) @_ensure_context @@ -1308,7 +1296,7 @@ async def set_status_message( status_message: str, *, is_terminal: bool | None = None, - ) -> ActorRun | None: + ) -> Run | None: """Set the status message for the current Actor run. Args: @@ -1327,11 +1315,18 @@ async def set_status_message( if not self.configuration.actor_run_id: raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.') - api_result = await self.apify_client.run(self.configuration.actor_run_id).update( - status_message=status_message, is_status_message_terminal=is_terminal + run_client = self.apify_client.run(self.configuration.actor_run_id) + run = await run_client.update( + status_message=status_message, + is_status_message_terminal=is_terminal, ) - return ActorRun.model_validate(api_result) + if run is None: + raise RuntimeError( + f'Failed to set status message for Actor run with ID "{self.configuration.actor_run_id}".' + ) + + return run @_ensure_context async def create_proxy_configuration( diff --git a/src/apify/_charging.py b/src/apify/_charging.py index 1ba09e14..614f3c99 100644 --- a/src/apify/_charging.py +++ b/src/apify/_charging.py @@ -5,18 +5,15 @@ from dataclasses import dataclass from datetime import UTC, datetime from decimal import Decimal -from typing import TYPE_CHECKING, Protocol, TypedDict +from typing import TYPE_CHECKING, Literal, Protocol, TypedDict -from pydantic import TypeAdapter - -from apify._models import ( - ActorRun, +from apify_client._models import ( FlatPricePerMonthActorPricingInfo, FreeActorPricingInfo, PayPerEventActorPricingInfo, PricePerDatasetItemActorPricingInfo, - PricingModel, ) + from apify._utils import ReentrantLock, docs_group, ensure_context from apify.log import logger from apify.storages import Dataset @@ -28,7 +25,8 @@ from apify._configuration import Configuration -run_validator = TypeAdapter[ActorRun | None](ActorRun | None) +PricingModel = Literal['PAY_PER_EVENT', 'PRICE_PER_DATASET_ITEM', 'FLAT_PRICE_PER_MONTH', 'FREE'] +"""Pricing model for an Actor.""" DEFAULT_DATASET_ITEM_EVENT = 'apify-default-dataset-item' @@ -308,7 +306,7 @@ async def charge(self, event_name: str, count: int = 1) -> ChargeResult: # the platform handles them automatically based on dataset writes. pass elif event_name in self._pricing_info: - await self._client.run(self._actor_run_id).charge(event_name, charged_count) + await self._client.run(self._actor_run_id).charge(event_name, count=charged_count) else: logger.warning(f"Attempting to charge for an unknown event '{event_name}'") @@ -427,7 +425,8 @@ async def _fetch_pricing_info(self) -> _FetchedPricingInfoDict: if self._actor_run_id is None: raise RuntimeError('Actor run ID not found even though the Actor is running on Apify') - run = run_validator.validate_python(await self._client.run(self._actor_run_id).get()) + run = await self._client.run(self._actor_run_id).get() + if run is None: raise RuntimeError('Actor run not found') diff --git a/src/apify/_configuration.py b/src/apify/_configuration.py index 97dc2d05..b421185a 100644 --- a/src/apify/_configuration.py +++ b/src/apify/_configuration.py @@ -10,17 +10,17 @@ from pydantic import AliasChoices, BeforeValidator, Field, model_validator from typing_extensions import TypedDict, deprecated -from crawlee import service_locator -from crawlee._utils.models import timedelta_ms -from crawlee._utils.urls import validate_http_url -from crawlee.configuration import Configuration as CrawleeConfiguration - -from apify._models import ( +from apify_client._models import ( FlatPricePerMonthActorPricingInfo, FreeActorPricingInfo, PayPerEventActorPricingInfo, PricePerDatasetItemActorPricingInfo, ) +from crawlee import service_locator +from crawlee._utils.models import timedelta_ms +from crawlee._utils.urls import validate_http_url +from crawlee.configuration import Configuration as CrawleeConfiguration + from apify._utils import docs_group logger = getLogger(__name__) @@ -71,6 +71,41 @@ def _load_storage_keys(data: None | str | ActorStorages) -> ActorStorages | None } +def _normalize_actor_pricing_info(data: Any) -> Any: + """Parse and normalize the `APIFY_ACTOR_PRICING_INFO` env var for the apify-client pricing models. + + The platform-provided env var omits some fields that are required by the apify-client pydantic models + (`apifyMarginPercentage`, `createdAt`, `startedAt`, and per-event `eventDescription`). Inject safe + defaults for those so validation succeeds on the Actor side. Treat an empty env value or an empty/ + discriminator-less JSON object as "no pricing info" to match the platform's behavior for Actors + without a configured pricing model. + """ + if data is None or data == '': + return None + pricing_info = json.loads(data) if isinstance(data, str) else data + if not isinstance(pricing_info, dict): + # Already a parsed pydantic model (or some other non-dict) - pass through for pydantic to validate. + return pricing_info + if not pricing_info.get('pricingModel'): + # Platform sets `APIFY_ACTOR_PRICING_INFO={}` for Actors without a configured pricing model; + # without the discriminator, treat it as absent rather than letting the pydantic union fail. + return None + + pricing_info.setdefault('apifyMarginPercentage', 0.0) + pricing_info.setdefault('createdAt', '1970-01-01T00:00:00.000Z') + pricing_info.setdefault('startedAt', '1970-01-01T00:00:00.000Z') + + pricing_per_event = pricing_info.get('pricingPerEvent') + if isinstance(pricing_per_event, dict): + actor_charge_events = pricing_per_event.get('actorChargeEvents') + if isinstance(actor_charge_events, dict): + for event in actor_charge_events.values(): + if isinstance(event, dict): + event.setdefault('eventDescription', '') + + return pricing_info + + @docs_group('Configuration') class Configuration(CrawleeConfiguration): """A class for specifying the configuration of an Actor. @@ -471,7 +506,7 @@ class Configuration(CrawleeConfiguration): description='JSON string with prising info of the actor', discriminator='pricing_model', ), - BeforeValidator(lambda data: json.loads(data) if isinstance(data, str) else data or None), + BeforeValidator(_normalize_actor_pricing_info), ] = None charged_event_counts: Annotated[ diff --git a/src/apify/_consts.py b/src/apify/_consts.py index 9bef9cdb..c3874e27 100644 --- a/src/apify/_consts.py +++ b/src/apify/_consts.py @@ -2,12 +2,87 @@ import re from datetime import timedelta +from enum import Enum, StrEnum EVENT_LISTENERS_TIMEOUT = timedelta(seconds=5) +"""Timeout for waiting on event listeners to finish during Actor exit.""" -BASE64_REGEXP = '[-A-Za-z0-9+/]*={0,3}' ENCRYPTED_STRING_VALUE_PREFIX = 'ENCRYPTED_VALUE' +"""Prefix for encrypted string values in Actor input.""" + ENCRYPTED_JSON_VALUE_PREFIX = 'ENCRYPTED_JSON' +"""Prefix for encrypted JSON values in Actor input.""" + ENCRYPTED_INPUT_VALUE_REGEXP = re.compile( - f'^({ENCRYPTED_STRING_VALUE_PREFIX}|{ENCRYPTED_JSON_VALUE_PREFIX}):(?:({BASE64_REGEXP}):)?({BASE64_REGEXP}):({BASE64_REGEXP})$' + r'^(ENCRYPTED_VALUE|ENCRYPTED_JSON):(?:([-A-Za-z0-9+/]*={0,3}):)?([-A-Za-z0-9+/]*={0,3}):([-A-Za-z0-9+/]*={0,3})$' ) +"""Regex matching encrypted input values with base64-encoded components.""" + + +class ActorEnvVars(StrEnum): + """Environment variables with ACTOR_ prefix set by the Apify platform.""" + + BUILD_ID = 'ACTOR_BUILD_ID' + BUILD_NUMBER = 'ACTOR_BUILD_NUMBER' + BUILD_TAGS = 'ACTOR_BUILD_TAGS' + DEFAULT_DATASET_ID = 'ACTOR_DEFAULT_DATASET_ID' + DEFAULT_KEY_VALUE_STORE_ID = 'ACTOR_DEFAULT_KEY_VALUE_STORE_ID' + DEFAULT_REQUEST_QUEUE_ID = 'ACTOR_DEFAULT_REQUEST_QUEUE_ID' + EVENTS_WEBSOCKET_URL = 'ACTOR_EVENTS_WEBSOCKET_URL' + FULL_NAME = 'ACTOR_FULL_NAME' + ID = 'ACTOR_ID' + INPUT_KEY = 'ACTOR_INPUT_KEY' + MAX_PAID_DATASET_ITEMS = 'ACTOR_MAX_PAID_DATASET_ITEMS' + MAX_TOTAL_CHARGE_USD = 'ACTOR_MAX_TOTAL_CHARGE_USD' + MEMORY_MBYTES = 'ACTOR_MEMORY_MBYTES' + PERMISSION_LEVEL = 'ACTOR_PERMISSION_LEVEL' + RUN_ID = 'ACTOR_RUN_ID' + STANDBY_PORT = 'ACTOR_STANDBY_PORT' + STANDBY_URL = 'ACTOR_STANDBY_URL' + STARTED_AT = 'ACTOR_STARTED_AT' + TASK_ID = 'ACTOR_TASK_ID' + TIMEOUT_AT = 'ACTOR_TIMEOUT_AT' + WEB_SERVER_PORT = 'ACTOR_WEB_SERVER_PORT' + WEB_SERVER_URL = 'ACTOR_WEB_SERVER_URL' + + +class ApifyEnvVars(StrEnum): + """Environment variables with APIFY_ prefix set by the Apify platform.""" + + API_BASE_URL = 'APIFY_API_BASE_URL' + API_PUBLIC_BASE_URL = 'APIFY_API_PUBLIC_BASE_URL' + DEDICATED_CPUS = 'APIFY_DEDICATED_CPUS' + DEFAULT_BROWSER_PATH = 'APIFY_DEFAULT_BROWSER_PATH' + DISABLE_BROWSER_SANDBOX = 'APIFY_DISABLE_BROWSER_SANDBOX' + DISABLE_OUTDATED_WARNING = 'APIFY_DISABLE_OUTDATED_WARNING' + FACT = 'APIFY_FACT' + HEADLESS = 'APIFY_HEADLESS' + INPUT_SECRETS_PRIVATE_KEY_FILE = 'APIFY_INPUT_SECRETS_PRIVATE_KEY_FILE' + INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE = 'APIFY_INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE' + IS_AT_HOME = 'APIFY_IS_AT_HOME' + LOCAL_STORAGE_DIR = 'APIFY_LOCAL_STORAGE_DIR' + LOG_FORMAT = 'APIFY_LOG_FORMAT' + LOG_LEVEL = 'APIFY_LOG_LEVEL' + MAX_USED_CPU_RATIO = 'APIFY_MAX_USED_CPU_RATIO' + META_ORIGIN = 'APIFY_META_ORIGIN' + METAMORPH_AFTER_SLEEP_MILLIS = 'APIFY_METAMORPH_AFTER_SLEEP_MILLIS' + PERSIST_STATE_INTERVAL_MILLIS = 'APIFY_PERSIST_STATE_INTERVAL_MILLIS' + PERSIST_STORAGE = 'APIFY_PERSIST_STORAGE' + PROXY_HOSTNAME = 'APIFY_PROXY_HOSTNAME' + PROXY_PASSWORD = 'APIFY_PROXY_PASSWORD' + PROXY_PORT = 'APIFY_PROXY_PORT' + PROXY_STATUS_URL = 'APIFY_PROXY_STATUS_URL' + PURGE_ON_START = 'APIFY_PURGE_ON_START' + SDK_LATEST_VERSION = 'APIFY_SDK_LATEST_VERSION' + SYSTEM_INFO_INTERVAL_MILLIS = 'APIFY_SYSTEM_INFO_INTERVAL_MILLIS' + TOKEN = 'APIFY_TOKEN' + USER_ID = 'APIFY_USER_ID' + USER_IS_PAYING = 'APIFY_USER_IS_PAYING' + WORKFLOW_KEY = 'APIFY_WORKFLOW_KEY' + + +class ActorExitCodes(int, Enum): + """Standard exit codes used by Actors to indicate run completion status.""" + + SUCCESS = 0 + ERROR_USER_FUNCTION_THREW = 91 diff --git a/src/apify/_models.py b/src/apify/_models.py deleted file mode 100644 index 357417ec..00000000 --- a/src/apify/_models.py +++ /dev/null @@ -1,266 +0,0 @@ -from __future__ import annotations - -from datetime import datetime -from typing import Annotated, Literal - -from pydantic import BaseModel, BeforeValidator, ConfigDict, Field - -from apify_shared.consts import ActorJobStatus, MetaOrigin, WebhookEventType -from crawlee._utils.urls import validate_http_url - -from apify._utils import docs_group - -PricingModel = Literal['PAY_PER_EVENT', 'PRICE_PER_DATASET_ITEM', 'FLAT_PRICE_PER_MONTH', 'FREE'] -"""Pricing model for an Actor.""" - -GeneralAccess = Literal['ANYONE_WITH_ID_CAN_READ', 'ANYONE_WITH_NAME_CAN_READ', 'FOLLOW_USER_SETTING', 'RESTRICTED'] -"""Defines the general access level for the resource.""" - - -class WebhookCondition(BaseModel): - """Condition for triggering a webhook.""" - - model_config = ConfigDict(populate_by_name=True, extra='allow') - - actor_id: Annotated[str | None, Field(alias='actorId')] = None - actor_task_id: Annotated[str | None, Field(alias='actorTaskId')] = None - actor_run_id: Annotated[str | None, Field(alias='actorRunId')] = None - - -WebhookDispatchStatus = Literal['ACTIVE', 'SUCCEEDED', 'FAILED'] -"""Status of a webhook dispatch.""" - - -class ExampleWebhookDispatch(BaseModel): - """Information about a webhook dispatch.""" - - model_config = ConfigDict(populate_by_name=True, extra='allow') - - status: WebhookDispatchStatus - finished_at: Annotated[datetime, Field(alias='finishedAt')] - - -class WebhookStats(BaseModel): - """Statistics about webhook dispatches.""" - - model_config = ConfigDict(populate_by_name=True, extra='allow') - - total_dispatches: Annotated[int, Field(alias='totalDispatches')] - - -@docs_group('Actor') -class Webhook(BaseModel): - model_config = ConfigDict(populate_by_name=True, extra='allow') - - event_types: Annotated[ - list[WebhookEventType], - Field(alias='eventTypes', description='Event types that should trigger the webhook'), - ] - request_url: Annotated[ - str, - Field(alias='requestUrl', description='URL that the webhook should call'), - BeforeValidator(validate_http_url), - ] - id: Annotated[str | None, Field(alias='id')] = None - created_at: Annotated[datetime | None, Field(alias='createdAt')] = None - modified_at: Annotated[datetime | None, Field(alias='modifiedAt')] = None - user_id: Annotated[str | None, Field(alias='userId')] = None - is_ad_hoc: Annotated[bool | None, Field(alias='isAdHoc')] = None - should_interpolate_strings: Annotated[bool | None, Field(alias='shouldInterpolateStrings')] = None - condition: Annotated[WebhookCondition | None, Field(alias='condition')] = None - ignore_ssl_errors: Annotated[bool | None, Field(alias='ignoreSslErrors')] = None - do_not_retry: Annotated[bool | None, Field(alias='doNotRetry')] = None - payload_template: Annotated[ - str | None, - Field(alias='payloadTemplate', description='Template for the payload sent by the webhook'), - ] = None - headers_template: Annotated[str | None, Field(alias='headersTemplate')] = None - description: Annotated[str | None, Field(alias='description')] = None - last_dispatch: Annotated[ExampleWebhookDispatch | None, Field(alias='lastDispatch')] = None - stats: Annotated[WebhookStats | None, Field(alias='stats')] = None - - -@docs_group('Actor') -class ActorRunMeta(BaseModel): - model_config = ConfigDict(populate_by_name=True, extra='allow') - - origin: Annotated[MetaOrigin, Field()] - client_ip: Annotated[str | None, Field(alias='clientIp')] = None - user_agent: Annotated[str | None, Field(alias='userAgent')] = None - schedule_id: Annotated[str | None, Field(alias='scheduleId')] = None - scheduled_at: Annotated[datetime | None, Field(alias='scheduledAt')] = None - - -@docs_group('Actor') -class ActorRunStats(BaseModel): - model_config = ConfigDict(populate_by_name=True, extra='allow') - - input_body_len: Annotated[int | None, Field(alias='inputBodyLen')] = None - migration_count: Annotated[int | None, Field(alias='migrationCount')] = None - reboot_count: Annotated[int | None, Field(alias='rebootCount')] = None - restart_count: Annotated[int, Field(alias='restartCount')] - resurrect_count: Annotated[int, Field(alias='resurrectCount')] - mem_avg_bytes: Annotated[float | None, Field(alias='memAvgBytes')] = None - mem_max_bytes: Annotated[int | None, Field(alias='memMaxBytes')] = None - mem_current_bytes: Annotated[int | None, Field(alias='memCurrentBytes')] = None - cpu_avg_usage: Annotated[float | None, Field(alias='cpuAvgUsage')] = None - cpu_max_usage: Annotated[float | None, Field(alias='cpuMaxUsage')] = None - cpu_current_usage: Annotated[float | None, Field(alias='cpuCurrentUsage')] = None - net_rx_bytes: Annotated[int | None, Field(alias='netRxBytes')] = None - net_tx_bytes: Annotated[int | None, Field(alias='netTxBytes')] = None - duration_millis: Annotated[int | None, Field(alias='durationMillis')] = None - run_time_secs: Annotated[float | None, Field(alias='runTimeSecs')] = None - metamorph: Annotated[int | None, Field(alias='metamorph')] = None - compute_units: Annotated[float, Field(alias='computeUnits')] - - -@docs_group('Actor') -class ActorRunOptions(BaseModel): - model_config = ConfigDict(populate_by_name=True, extra='allow') - - build: str - timeout_secs: Annotated[int, Field(alias='timeoutSecs')] - memory_mbytes: Annotated[int, Field(alias='memoryMbytes')] - disk_mbytes: Annotated[int, Field(alias='diskMbytes')] - max_items: Annotated[int | None, Field(alias='maxItems')] = None - max_total_charge_usd: Annotated[float | None, Field(alias='maxTotalChargeUsd')] = None - - -@docs_group('Actor') -class ActorRunUsage(BaseModel): - model_config = ConfigDict(populate_by_name=True, extra='allow') - - actor_compute_units: Annotated[float | None, Field(alias='ACTOR_COMPUTE_UNITS')] = None - dataset_reads: Annotated[int | None, Field(alias='DATASET_READS')] = None - dataset_writes: Annotated[int | None, Field(alias='DATASET_WRITES')] = None - key_value_store_reads: Annotated[int | None, Field(alias='KEY_VALUE_STORE_READS')] = None - key_value_store_writes: Annotated[int | None, Field(alias='KEY_VALUE_STORE_WRITES')] = None - key_value_store_lists: Annotated[int | None, Field(alias='KEY_VALUE_STORE_LISTS')] = None - request_queue_reads: Annotated[int | None, Field(alias='REQUEST_QUEUE_READS')] = None - request_queue_writes: Annotated[int | None, Field(alias='REQUEST_QUEUE_WRITES')] = None - data_transfer_internal_gbytes: Annotated[float | None, Field(alias='DATA_TRANSFER_INTERNAL_GBYTES')] = None - data_transfer_external_gbytes: Annotated[float | None, Field(alias='DATA_TRANSFER_EXTERNAL_GBYTES')] = None - proxy_residential_transfer_gbytes: Annotated[float | None, Field(alias='PROXY_RESIDENTIAL_TRANSFER_GBYTES')] = None - proxy_serps: Annotated[int | None, Field(alias='PROXY_SERPS')] = None - - -@docs_group('Actor') -class ActorRunUsageUsd(BaseModel): - """Resource usage costs in USD.""" - - model_config = ConfigDict(populate_by_name=True, extra='allow') - - actor_compute_units: Annotated[float | None, Field(alias='ACTOR_COMPUTE_UNITS')] = None - dataset_reads: Annotated[float | None, Field(alias='DATASET_READS')] = None - dataset_writes: Annotated[float | None, Field(alias='DATASET_WRITES')] = None - key_value_store_reads: Annotated[float | None, Field(alias='KEY_VALUE_STORE_READS')] = None - key_value_store_writes: Annotated[float | None, Field(alias='KEY_VALUE_STORE_WRITES')] = None - key_value_store_lists: Annotated[float | None, Field(alias='KEY_VALUE_STORE_LISTS')] = None - request_queue_reads: Annotated[float | None, Field(alias='REQUEST_QUEUE_READS')] = None - request_queue_writes: Annotated[float | None, Field(alias='REQUEST_QUEUE_WRITES')] = None - data_transfer_internal_gbytes: Annotated[float | None, Field(alias='DATA_TRANSFER_INTERNAL_GBYTES')] = None - data_transfer_external_gbytes: Annotated[float | None, Field(alias='DATA_TRANSFER_EXTERNAL_GBYTES')] = None - proxy_residential_transfer_gbytes: Annotated[float | None, Field(alias='PROXY_RESIDENTIAL_TRANSFER_GBYTES')] = None - proxy_serps: Annotated[float | None, Field(alias='PROXY_SERPS')] = None - - -class Metamorph(BaseModel): - """Information about a metamorph event that occurred during the run.""" - - model_config = ConfigDict(populate_by_name=True, extra='allow') - - created_at: Annotated[datetime, Field(alias='createdAt')] - actor_id: Annotated[str, Field(alias='actorId')] - build_id: Annotated[str, Field(alias='buildId')] - input_key: Annotated[str | None, Field(alias='inputKey')] = None - - -class CommonActorPricingInfo(BaseModel): - model_config = ConfigDict(populate_by_name=True, extra='allow') - - apify_margin_percentage: Annotated[float | None, Field(alias='apifyMarginPercentage')] = None - created_at: Annotated[datetime | None, Field(alias='createdAt')] = None - started_at: Annotated[datetime | None, Field(alias='startedAt')] = None - notified_about_future_change_at: Annotated[datetime | None, Field(alias='notifiedAboutFutureChangeAt')] = None - notified_about_change_at: Annotated[datetime | None, Field(alias='notifiedAboutChangeAt')] = None - reason_for_change: Annotated[str | None, Field(alias='reasonForChange')] = None - - -class FreeActorPricingInfo(CommonActorPricingInfo): - pricing_model: Annotated[Literal['FREE'], Field(alias='pricingModel')] - - -class FlatPricePerMonthActorPricingInfo(CommonActorPricingInfo): - pricing_model: Annotated[Literal['FLAT_PRICE_PER_MONTH'], Field(alias='pricingModel')] - trial_minutes: Annotated[int, Field(alias='trialMinutes')] - price_per_unit_usd: Annotated[float, Field(alias='pricePerUnitUsd')] - - -class PricePerDatasetItemActorPricingInfo(CommonActorPricingInfo): - pricing_model: Annotated[Literal['PRICE_PER_DATASET_ITEM'], Field(alias='pricingModel')] - unit_name: Annotated[str, Field(alias='unitName')] - price_per_unit_usd: Annotated[float, Field(alias='pricePerUnitUsd')] - - -class ActorChargeEvent(BaseModel): - model_config = ConfigDict(populate_by_name=True, extra='allow') - - event_price_usd: Annotated[float, Field(alias='eventPriceUsd')] - event_title: Annotated[str, Field(alias='eventTitle')] - event_description: Annotated[str | None, Field(alias='eventDescription')] = None - - -class PricingPerEvent(BaseModel): - model_config = ConfigDict(populate_by_name=True, extra='allow') - - actor_charge_events: Annotated[dict[str, ActorChargeEvent] | None, Field(alias='actorChargeEvents')] = None - - -class PayPerEventActorPricingInfo(CommonActorPricingInfo): - pricing_model: Annotated[Literal['PAY_PER_EVENT'], Field(alias='pricingModel')] - pricing_per_event: Annotated[PricingPerEvent, Field(alias='pricingPerEvent')] - minimal_max_total_charge_usd: Annotated[float | None, Field(alias='minimalMaxTotalChargeUsd')] = None - - -@docs_group('Actor') -class ActorRun(BaseModel): - model_config = ConfigDict(populate_by_name=True, extra='allow') - - id: Annotated[str, Field(alias='id')] - act_id: Annotated[str, Field(alias='actId')] - user_id: Annotated[str, Field(alias='userId')] - actor_task_id: Annotated[str | None, Field(alias='actorTaskId')] = None - started_at: Annotated[datetime, Field(alias='startedAt')] - finished_at: Annotated[datetime | None, Field(alias='finishedAt')] = None - status: Annotated[ActorJobStatus, Field(alias='status')] - status_message: Annotated[str | None, Field(alias='statusMessage')] = None - is_status_message_terminal: Annotated[bool | None, Field(alias='isStatusMessageTerminal')] = None - meta: Annotated[ActorRunMeta, Field(alias='meta')] - stats: Annotated[ActorRunStats, Field(alias='stats')] - options: Annotated[ActorRunOptions, Field(alias='options')] - build_id: Annotated[str, Field(alias='buildId')] - exit_code: Annotated[int | None, Field(alias='exitCode')] = None - general_access: Annotated[str | None, Field(alias='generalAccess')] = None - default_key_value_store_id: Annotated[str, Field(alias='defaultKeyValueStoreId')] - default_dataset_id: Annotated[str, Field(alias='defaultDatasetId')] - default_request_queue_id: Annotated[str, Field(alias='defaultRequestQueueId')] - build_number: Annotated[str | None, Field(alias='buildNumber')] = None - container_url: Annotated[str | None, Field(alias='containerUrl')] = None - is_container_server_ready: Annotated[bool | None, Field(alias='isContainerServerReady')] = None - git_branch_name: Annotated[str | None, Field(alias='gitBranchName')] = None - usage: Annotated[ActorRunUsage | None, Field(alias='usage')] = None - usage_total_usd: Annotated[float | None, Field(alias='usageTotalUsd')] = None - usage_usd: Annotated[ActorRunUsageUsd | None, Field(alias='usageUsd')] = None - pricing_info: Annotated[ - FreeActorPricingInfo - | FlatPricePerMonthActorPricingInfo - | PricePerDatasetItemActorPricingInfo - | PayPerEventActorPricingInfo - | None, - Field(alias='pricingInfo', discriminator='pricing_model'), - ] = None - charged_event_counts: Annotated[ - dict[str, int] | None, - Field(alias='chargedEventCounts'), - ] = None - metamorphs: Annotated[list[Metamorph] | None, Field(alias='metamorphs')] = None diff --git a/src/apify/_proxy_configuration.py b/src/apify/_proxy_configuration.py index 10cf277f..453f2331 100644 --- a/src/apify/_proxy_configuration.py +++ b/src/apify/_proxy_configuration.py @@ -11,12 +11,12 @@ import impit from yarl import URL -from apify_shared.consts import ApifyEnvVars from crawlee.proxy_configuration import ProxyConfiguration as CrawleeProxyConfiguration from crawlee.proxy_configuration import ProxyInfo as CrawleeProxyInfo from crawlee.proxy_configuration import _NewUrlFunction from apify._configuration import Configuration +from apify._consts import ApifyEnvVars from apify._utils import docs_group from apify.log import logger @@ -286,9 +286,8 @@ async def _maybe_fetch_password(self) -> None: if token and self._apify_client: user_info = await self._apify_client.user().get() - if user_info: - password = user_info['proxy']['password'] - self._password = password + if user_info and (proxy := getattr(user_info, 'proxy', None)): + self._password = proxy.password async def _check_access(self) -> None: proxy_status_url = f'{self._configuration.proxy_status_url}/?format=json' diff --git a/src/apify/storage_clients/_apify/_alias_resolving.py b/src/apify/storage_clients/_apify/_alias_resolving.py index abca78fe..50234a1a 100644 --- a/src/apify/storage_clients/_apify/_alias_resolving.py +++ b/src/apify/storage_clients/_apify/_alias_resolving.py @@ -2,6 +2,7 @@ import logging from asyncio import Lock +from datetime import timedelta from functools import cached_property from logging import getLogger from typing import TYPE_CHECKING, ClassVar, Literal, overload @@ -14,7 +15,7 @@ from collections.abc import Callable from types import TracebackType - from apify_client.clients import ( + from apify_client._resource_clients import ( DatasetClientAsync, DatasetCollectionClientAsync, KeyValueStoreClientAsync, @@ -107,8 +108,8 @@ async def open_by_alias( # Create new unnamed storage and store alias mapping raw_metadata = await collection_client.get_or_create() - await alias_resolver.store_mapping(storage_id=raw_metadata['id']) - return get_resource_client_by_id(raw_metadata['id']) + await alias_resolver.store_mapping(storage_id=raw_metadata.id) + return get_resource_client_by_id(raw_metadata.id) class AliasResolver: @@ -257,8 +258,7 @@ async def _get_default_kvs_client(configuration: Configuration) -> KeyValueStore token=configuration.token, api_url=configuration.api_base_url, max_retries=8, - min_delay_between_retries_millis=500, - timeout_secs=360, + min_delay_between_retries=timedelta(milliseconds=500), ) if not configuration.default_key_value_store_id: diff --git a/src/apify/storage_clients/_apify/_api_client_creation.py b/src/apify/storage_clients/_apify/_api_client_creation.py index 542a203f..75e95c60 100644 --- a/src/apify/storage_clients/_apify/_api_client_creation.py +++ b/src/apify/storage_clients/_apify/_api_client_creation.py @@ -1,5 +1,6 @@ from __future__ import annotations +from datetime import timedelta from typing import TYPE_CHECKING, Literal, overload from apify_client import ApifyClientAsync @@ -8,7 +9,7 @@ from apify.storage_clients._apify._alias_resolving import AliasResolver, open_by_alias if TYPE_CHECKING: - from apify_client.clients import DatasetClientAsync, KeyValueStoreClientAsync, RequestQueueClientAsync + from apify_client._resource_clients import DatasetClientAsync, KeyValueStoreClientAsync, RequestQueueClientAsync from apify._configuration import Configuration @@ -137,13 +138,13 @@ def get_resource_client(storage_id: str) -> DatasetClientAsync: # Default storage does not exist. Create a new one. if not raw_metadata: raw_metadata = await collection_client.get_or_create() - resource_client = get_resource_client(raw_metadata['id']) + resource_client = get_resource_client(raw_metadata.id) return resource_client # Open by name. case (None, str(), None, _): raw_metadata = await collection_client.get_or_create(name=name) - return get_resource_client(raw_metadata['id']) + return get_resource_client(raw_metadata.id) # Open by ID. case (None, None, str(), _): @@ -177,6 +178,5 @@ def _create_api_client(configuration: Configuration) -> ApifyClientAsync: api_url=configuration.api_base_url, api_public_url=configuration.api_public_base_url, max_retries=8, - min_delay_between_retries_millis=500, - timeout_secs=360, + min_delay_between_retries=timedelta(milliseconds=500), ) diff --git a/src/apify/storage_clients/_apify/_dataset_client.py b/src/apify/storage_clients/_apify/_dataset_client.py index fc809646..d8c27792 100644 --- a/src/apify/storage_clients/_apify/_dataset_client.py +++ b/src/apify/storage_clients/_apify/_dataset_client.py @@ -18,7 +18,7 @@ if TYPE_CHECKING: from collections.abc import AsyncIterator, Mapping, Sequence - from apify_client.clients import DatasetClientAsync + from apify_client._resource_clients import DatasetClientAsync from crawlee._types import JsonSerializable from apify import Configuration @@ -69,7 +69,18 @@ def __init__( @override async def get_metadata(self) -> DatasetMetadata: metadata = await self._api_client.get() - return DatasetMetadata.model_validate(metadata) + + if metadata is None: + raise ValueError('Failed to retrieve dataset metadata.') + + return DatasetMetadata( + id=metadata.id, + name=metadata.name, + created_at=metadata.created_at, + modified_at=metadata.modified_at, + accessed_at=metadata.accessed_at, + item_count=int(metadata.item_count), + ) @classmethod async def open( diff --git a/src/apify/storage_clients/_apify/_key_value_store_client.py b/src/apify/storage_clients/_apify/_key_value_store_client.py index b422b464..49c9c4f6 100644 --- a/src/apify/storage_clients/_apify/_key_value_store_client.py +++ b/src/apify/storage_clients/_apify/_key_value_store_client.py @@ -11,12 +11,12 @@ from crawlee.storage_clients.models import KeyValueStoreRecord, KeyValueStoreRecordMetadata from ._api_client_creation import create_storage_api_client -from ._models import ApifyKeyValueStoreMetadata, KeyValueStoreListKeysPage +from ._models import ApifyKeyValueStoreMetadata if TYPE_CHECKING: from collections.abc import AsyncIterator - from apify_client.clients import KeyValueStoreClientAsync + from apify_client._resource_clients import KeyValueStoreClientAsync from apify import Configuration @@ -54,7 +54,18 @@ def __init__( @override async def get_metadata(self) -> ApifyKeyValueStoreMetadata: metadata = await self._api_client.get() - return ApifyKeyValueStoreMetadata.model_validate(metadata) + + if metadata is None: + raise ValueError('Failed to retrieve dataset metadata.') + + return ApifyKeyValueStoreMetadata( + id=metadata.id, + name=metadata.name, + created_at=metadata.created_at, + modified_at=metadata.modified_at, + accessed_at=metadata.accessed_at, + url_signing_secret_key=metadata.url_signing_secret_key, + ) @classmethod async def open( @@ -143,14 +154,12 @@ async def iterate_keys( count = 0 while True: - response = await self._api_client.list_keys(exclusive_start_key=exclusive_start_key) - list_key_page = KeyValueStoreListKeysPage.model_validate(response) + list_key_page = await self._api_client.list_keys(exclusive_start_key=exclusive_start_key) for item in list_key_page.items: - # Convert KeyValueStoreKeyInfo to KeyValueStoreRecordMetadata record_metadata = KeyValueStoreRecordMetadata( key=item.key, - size=item.size, + size=int(item.size), content_type='application/octet-stream', # Content type not available from list_keys ) yield record_metadata diff --git a/src/apify/storage_clients/_apify/_models.py b/src/apify/storage_clients/_apify/_models.py index d05f3394..3c64adc0 100644 --- a/src/apify/storage_clients/_apify/_models.py +++ b/src/apify/storage_clients/_apify/_models.py @@ -1,15 +1,27 @@ from __future__ import annotations from datetime import datetime, timedelta -from typing import Annotated +from typing import TYPE_CHECKING, Annotated from pydantic import BaseModel, ConfigDict, Field +from apify_client._models import RequestQueueStats from crawlee.storage_clients.models import KeyValueStoreMetadata, RequestQueueMetadata from apify import Request from apify._utils import docs_group +if TYPE_CHECKING: + from apify_client._models import LockedRequestQueueHead + +__all__ = [ + 'ApifyKeyValueStoreMetadata', + 'ApifyRequestQueueMetadata', + 'CachedRequest', + 'RequestQueueHead', + 'RequestQueueStats', +] + @docs_group('Storage data') class ApifyKeyValueStoreMetadata(KeyValueStoreMetadata): @@ -22,15 +34,6 @@ class ApifyKeyValueStoreMetadata(KeyValueStoreMetadata): """The secret key used for signing URLs for secure access to key-value store records.""" -@docs_group('Storage data') -class ProlongRequestLockResponse(BaseModel): - """Response to prolong request lock calls.""" - - model_config = ConfigDict(populate_by_name=True, extra='allow') - - lock_expires_at: Annotated[datetime, Field(alias='lockExpiresAt')] - - @docs_group('Storage data') class RequestQueueHead(BaseModel): """Model for request queue head. @@ -59,33 +62,21 @@ class RequestQueueHead(BaseModel): items: Annotated[list[Request], Field(alias='items', default_factory=list[Request])] """The list of request objects retrieved from the beginning of the queue.""" + @classmethod + def from_client_locked_head(cls, client_locked_head: LockedRequestQueueHead) -> RequestQueueHead: + """Create a `RequestQueueHead` from an Apify API client's `LockedRequestQueueHead` model. -class KeyValueStoreKeyInfo(BaseModel): - """Model for a key-value store key info. - - Only internal structure. - """ - - model_config = ConfigDict(populate_by_name=True, extra='allow') - - key: Annotated[str, Field(alias='key')] - size: Annotated[int, Field(alias='size')] - + Args: + client_locked_head: `LockedRequestQueueHead` instance from Apify API client. -class KeyValueStoreListKeysPage(BaseModel): - """Model for listing keys in the key-value store. + Returns: + `RequestQueueHead` instance with properly converted types. + """ + # Dump to dict with mode='json' to serialize special types like AnyUrl + head_dict = client_locked_head.model_dump(by_alias=True, mode='json') - Only internal structure. - """ - - model_config = ConfigDict(populate_by_name=True, extra='allow') - - count: Annotated[int, Field(alias='count')] - limit: Annotated[int, Field(alias='limit')] - is_truncated: Annotated[bool, Field(alias='isTruncated')] - items: Annotated[list[KeyValueStoreKeyInfo], Field(alias='items', default_factory=list)] - exclusive_start_key: Annotated[str | None, Field(alias='exclusiveStartKey', default=None)] - next_exclusive_start_key: Annotated[str | None, Field(alias='nextExclusiveStartKey', default=None)] + # Validate and construct RequestQueueHead from the serialized dict + return cls.model_validate(head_dict) class CachedRequest(BaseModel): @@ -107,25 +98,6 @@ class CachedRequest(BaseModel): """The expiration time of the lock on the request.""" -class RequestQueueStats(BaseModel): - model_config = ConfigDict(populate_by_name=True, extra='allow') - - delete_count: Annotated[int, Field(alias='deleteCount', default=0)] - """"The number of request queue deletes.""" - - head_item_read_count: Annotated[int, Field(alias='headItemReadCount', default=0)] - """The number of request queue head reads.""" - - read_count: Annotated[int, Field(alias='readCount', default=0)] - """The number of request queue reads.""" - - storage_bytes: Annotated[int, Field(alias='storageBytes', default=0)] - """Storage size in bytes.""" - - write_count: Annotated[int, Field(alias='writeCount', default=0)] - """The number of request queue writes.""" - - class ApifyRequestQueueMetadata(RequestQueueMetadata): stats: Annotated[RequestQueueStats, Field(alias='stats', default_factory=RequestQueueStats)] """Additional statistics about the request queue.""" diff --git a/src/apify/storage_clients/_apify/_request_queue_client.py b/src/apify/storage_clients/_apify/_request_queue_client.py index 9a589ec1..cc44c3c5 100644 --- a/src/apify/storage_clients/_apify/_request_queue_client.py +++ b/src/apify/storage_clients/_apify/_request_queue_client.py @@ -15,11 +15,10 @@ if TYPE_CHECKING: from collections.abc import Sequence - from apify_client.clients import RequestQueueClientAsync - from crawlee import Request + from apify_client._resource_clients import RequestQueueClientAsync from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata - from apify import Configuration + from apify import Configuration, Request logger = getLogger(__name__) @@ -77,26 +76,31 @@ async def get_metadata(self) -> ApifyRequestQueueMetadata: Returns: Request queue metadata with accurate counts and timestamps, combining API data with local estimates. """ - response = await self._api_client.get() + metadata = await self._api_client.get() - if response is None: + if metadata is None: raise ValueError('Failed to fetch request queue metadata from the API.') + total_request_count = int(metadata.total_request_count) + handled_request_count = int(metadata.handled_request_count) + pending_request_count = int(metadata.pending_request_count) + # Enhance API response with local estimations to account for propagation delays (API data can be delayed # by a few seconds, while local estimates are immediately accurate). return ApifyRequestQueueMetadata( - id=response['id'], - name=response['name'], - total_request_count=max(response['totalRequestCount'], self._implementation.metadata.total_request_count), - handled_request_count=max( - response['handledRequestCount'], self._implementation.metadata.handled_request_count + id=metadata.id, + name=metadata.name, + total_request_count=max(total_request_count, self._implementation.metadata.total_request_count), + handled_request_count=max(handled_request_count, self._implementation.metadata.handled_request_count), + pending_request_count=pending_request_count, + created_at=min(metadata.created_at, self._implementation.metadata.created_at), + modified_at=max(metadata.modified_at, self._implementation.metadata.modified_at), + accessed_at=max(metadata.accessed_at, self._implementation.metadata.accessed_at), + had_multiple_clients=metadata.had_multiple_clients or self._implementation.metadata.had_multiple_clients, + stats=RequestQueueStats.model_validate( + metadata.stats.model_dump(by_alias=True) if metadata.stats else {}, + by_alias=True, ), - pending_request_count=response['pendingRequestCount'], - created_at=min(response['createdAt'], self._implementation.metadata.created_at), - modified_at=max(response['modifiedAt'], self._implementation.metadata.modified_at), - accessed_at=max(response['accessedAt'], self._implementation.metadata.accessed_at), - had_multiple_clients=response['hadMultipleClients'] or self._implementation.metadata.had_multiple_clients, - stats=RequestQueueStats.model_validate(response['stats'], by_alias=True), ) @classmethod @@ -145,7 +149,7 @@ async def open( raw_metadata = await api_client.get() if raw_metadata is None: raise ValueError('Failed to retrieve request queue metadata from the API.') - metadata = ApifyRequestQueueMetadata.model_validate(raw_metadata) + metadata = ApifyRequestQueueMetadata.model_validate(raw_metadata.model_dump(by_alias=True)) return cls( api_client=api_client, diff --git a/src/apify/storage_clients/_apify/_request_queue_shared_client.py b/src/apify/storage_clients/_apify/_request_queue_shared_client.py index 5a160dad..496ec4f1 100644 --- a/src/apify/storage_clients/_apify/_request_queue_shared_client.py +++ b/src/apify/storage_clients/_apify/_request_queue_shared_client.py @@ -11,13 +11,14 @@ from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata from ._models import ApifyRequestQueueMetadata, CachedRequest, RequestQueueHead -from ._utils import unique_key_to_request_id -from apify import Request +from ._utils import to_crawlee_request, unique_key_to_request_id if TYPE_CHECKING: from collections.abc import Callable, Coroutine, Sequence - from apify_client.clients import RequestQueueClientAsync + from apify_client._resource_clients import RequestQueueClientAsync + + from apify import Request logger = getLogger(__name__) @@ -65,7 +66,7 @@ def __init__( """The Apify API client for communication with Apify platform.""" self._queue_head = deque[str]() - """Local cache of request IDs from the queue head for efficient fetching.""" + """Local cache of request IDs from the request queue head for efficient fetching.""" self._requests_cache: LRUCache[str, CachedRequest] = LRUCache(maxsize=cache_size) """LRU cache storing request objects, keyed by request ID.""" @@ -121,18 +122,17 @@ async def add_batch_of_requests( if new_requests: # Prepare requests for API by converting to dictionaries. - requests_dict = [ - request.model_dump( - by_alias=True, - ) - for request in new_requests - ] + requests_dict = [request.model_dump(by_alias=True) for request in new_requests] # Send requests to API. - api_response = AddRequestsResponse.model_validate( - await self._api_client.batch_add_requests(requests=requests_dict, forefront=forefront) + batch_response = await self._api_client.batch_add_requests( + requests=requests_dict, + forefront=forefront, ) + batch_response_dict = batch_response.model_dump(by_alias=True) + api_response = AddRequestsResponse.model_validate(batch_response_dict) + # Add the locally known already present processed requests based on the local cache. api_response.processed_requests.extend(already_present_requests) @@ -177,7 +177,7 @@ async def fetch_next_request(self) -> Request | None: if not self._queue_head: return None - # Get the next request ID from the queue head + # Get the next request ID from the request queue head next_request_id = self._queue_head.popleft() request = await self._get_or_hydrate_request(next_request_id) @@ -312,7 +312,7 @@ async def _get_request_by_id(self, request_id: str) -> Request | None: if response is None: return None - return Request.model_validate(response) + return to_crawlee_request(response) async def _ensure_head_is_non_empty(self) -> None: """Ensure that the queue head has requests if they are available in the queue.""" @@ -388,7 +388,7 @@ async def _update_request( ) return ProcessedRequest.model_validate( - {'uniqueKey': request.unique_key} | response, + {'uniqueKey': request.unique_key} | response.model_dump(by_alias=True), ) async def _list_head( @@ -431,19 +431,19 @@ async def _list_head( self._should_check_for_forefront_requests = False # Otherwise fetch from API - response = await self._api_client.list_and_lock_head( - lock_secs=int(self._DEFAULT_LOCK_TIME.total_seconds()), + locked_queue_head = await self._api_client.list_and_lock_head( + lock_duration=self._DEFAULT_LOCK_TIME, limit=limit, ) # Update the queue head cache - self._queue_has_locked_requests = response.get('queueHasLockedRequests', False) + self._queue_has_locked_requests = locked_queue_head.queue_has_locked_requests # Check if there is another client working with the RequestQueue - self.metadata.had_multiple_clients = response.get('hadMultipleClients', False) + self.metadata.had_multiple_clients = locked_queue_head.had_multiple_clients - for request_data in response.get('items', []): - request = Request.model_validate(request_data) - request_id = request_data.get('id') + for request_data in locked_queue_head.items: + request = to_crawlee_request(request_data) + request_id = request_data.id # Skip requests without ID or unique key if not request.unique_key or not request_id: @@ -473,7 +473,7 @@ async def _list_head( # After adding new requests to the forefront, any existing leftover locked request is kept in the end. self._queue_head.append(leftover_id) - return RequestQueueHead.model_validate(response) + return RequestQueueHead.from_client_locked_head(locked_queue_head) def _cache_request( self, diff --git a/src/apify/storage_clients/_apify/_request_queue_single_client.py b/src/apify/storage_clients/_apify/_request_queue_single_client.py index f6510e47..a42f7252 100644 --- a/src/apify/storage_clients/_apify/_request_queue_single_client.py +++ b/src/apify/storage_clients/_apify/_request_queue_single_client.py @@ -9,13 +9,14 @@ from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata -from ._utils import unique_key_to_request_id -from apify import Request +from ._utils import to_crawlee_request, unique_key_to_request_id if TYPE_CHECKING: from collections.abc import Sequence - from apify_client.clients import RequestQueueClientAsync + from apify_client._resource_clients import RequestQueueClientAsync + + from apify import Request logger = getLogger(__name__) @@ -147,22 +148,20 @@ async def add_batch_of_requests( if new_requests: # Prepare requests for API by converting to dictionaries. - requests_dict = [ - request.model_dump( - by_alias=True, - ) - for request in new_requests - ] + requests_dict = [request.model_dump(by_alias=True) for request in new_requests] # Send requests to API. - api_response = AddRequestsResponse.model_validate( - await self._api_client.batch_add_requests(requests=requests_dict, forefront=forefront) - ) + batch_response = await self._api_client.batch_add_requests(requests=requests_dict, forefront=forefront) + batch_response_dict = batch_response.model_dump(by_alias=True) + api_response = AddRequestsResponse.model_validate(batch_response_dict) + # Add the locally known already present processed requests based on the local cache. api_response.processed_requests.extend(already_present_requests) + # Remove unprocessed requests from the cache for unprocessed_request in api_response.unprocessed_requests: - self._requests_cache.pop(unique_key_to_request_id(unprocessed_request.unique_key), None) + request_id = unique_key_to_request_id(unprocessed_request.unique_key) + self._requests_cache.pop(request_id, None) else: api_response = AddRequestsResponse( @@ -295,16 +294,16 @@ async def _list_head(self) -> None: # Update metadata # Check if there is another client working with the RequestQueue - self.metadata.had_multiple_clients = response.get('hadMultipleClients', False) + self.metadata.had_multiple_clients = response.had_multiple_clients # Should warn once? This might be outside expected context if the other consumers consumes at the same time - if modified_at := response.get('queueModifiedAt'): - self.metadata.modified_at = max(self.metadata.modified_at, modified_at) + if response.queue_modified_at: + self.metadata.modified_at = max(self.metadata.modified_at, response.queue_modified_at) # Update the cached data - for request_data in response.get('items', []): - request = Request.model_validate(request_data) - request_id = request_data['id'] + for request_data in response.items: + request = to_crawlee_request(request_data) + request_id = request_data.id if request_id in self._requests_in_progress: # Ignore requests that are already in progress, we will not process them again. @@ -340,7 +339,7 @@ async def _get_request_by_id(self, id: str) -> Request | None: if response is None: return None - request = Request.model_validate(response) + request = to_crawlee_request(response) # Updated local caches if id in self._requests_in_progress: @@ -377,7 +376,7 @@ async def _update_request( ) return ProcessedRequest.model_validate( - {'uniqueKey': request.unique_key} | response, + {'uniqueKey': request.unique_key} | response.model_dump(by_alias=True), ) async def _init_caches(self) -> None: @@ -390,9 +389,12 @@ async def _init_caches(self) -> None: Local deduplication is cheaper, it takes 1 API call for whole cache and 1 read operation per request. """ response = await self._api_client.list_requests(limit=10_000) - for request_data in response.get('items', []): - request = Request.model_validate(request_data) - request_id = request_data['id'] + for request_data in response.items: + request_id = request_data.id + if request_id is None: + continue + + request = to_crawlee_request(request_data) if request.was_already_handled: # Cache just id for deduplication diff --git a/src/apify/storage_clients/_apify/_utils.py b/src/apify/storage_clients/_apify/_utils.py index 5d8174e8..c9bcd54d 100644 --- a/src/apify/storage_clients/_apify/_utils.py +++ b/src/apify/storage_clients/_apify/_utils.py @@ -7,7 +7,12 @@ from crawlee._utils.crypto import compute_short_hash +from apify import Request + if TYPE_CHECKING: + from apify_client._models import HeadRequest, LockedHeadRequest + from apify_client._models import Request as ClientRequest + from apify import Configuration @@ -39,3 +44,19 @@ def hash_api_base_url_and_token(configuration: Configuration) -> str: if configuration.api_public_base_url is None or configuration.token is None: raise ValueError("'Configuration.api_public_base_url' and 'Configuration.token' must be set.") return compute_short_hash(f'{configuration.api_public_base_url}{configuration.token}'.encode()) + + +def to_crawlee_request(client_request: ClientRequest | HeadRequest | LockedHeadRequest) -> Request: + """Convert an Apify API client's `Request` model to a Crawlee's `Request` model. + + Args: + client_request: Request instances from Apify API client. + + Returns: + `Request` instance from Crawlee with properly converted types. + """ + # Dump to dict with mode='json' to serialize special types like AnyUrl + request_dict = client_request.model_dump(by_alias=True, mode='json') + + # Validate and construct Crawlee Request from the serialized dict + return Request.model_validate(request_dict) diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index bdcc9883..6a29342f 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -6,6 +6,7 @@ import subprocess import sys import textwrap +from datetime import UTC, datetime, timedelta from pathlib import Path from typing import TYPE_CHECKING, Any, Protocol @@ -13,19 +14,20 @@ from filelock import FileLock from apify_client import ApifyClient, ApifyClientAsync -from apify_shared.consts import ActorJobStatus, ActorPermissionLevel, ActorSourceType, ApifyEnvVars from crawlee import service_locator import apify._actor from ._utils import generate_unique_resource_name -from apify._models import ActorRun +from apify._consts import ApifyEnvVars from apify.storage_clients._apify._alias_resolving import AliasResolver if TYPE_CHECKING: from collections.abc import Awaitable, Callable, Coroutine, Iterator, Mapping from decimal import Decimal - from apify_client.clients.resource_clients import ActorClientAsync + from apify_client._literals import ActorPermissionLevel + from apify_client._models import Run + from apify_client._resource_clients import ActorClientAsync _TOKEN_ENV_VAR = 'APIFY_TEST_USER_API_TOKEN' _API_URL_ENV_VAR = 'APIFY_INTEGRATION_TESTS_API_URL' @@ -47,7 +49,9 @@ def apify_client_async(apify_token: str) -> ApifyClientAsync: """Create an instance of the ApifyClientAsync.""" api_url = os.getenv(_API_URL_ENV_VAR) - return ApifyClientAsync(apify_token, api_url=api_url) + if api_url is not None: + return ApifyClientAsync(apify_token, api_url=api_url) + return ApifyClientAsync(apify_token) @pytest.fixture @@ -236,7 +240,13 @@ async def _make_actor( if (main_func and main_py) or (main_func and source_files) or (main_py and source_files): raise TypeError('Cannot specify more than one of `main_func`, `main_py` and `source_files` arguments') - client = ApifyClientAsync(token=apify_token, api_url=os.getenv(_API_URL_ENV_VAR)) + api_url = os.getenv(_API_URL_ENV_VAR) + client = ( + ApifyClientAsync(token=apify_token) + if api_url is None + else ApifyClientAsync(token=apify_token, api_url=api_url) + ) + actor_name = generate_unique_resource_name(label) # Get the source of main_func and convert it into a reasonable main_py file. @@ -299,30 +309,30 @@ async def _make_actor( name=actor_name, default_run_build='latest', default_run_memory_mbytes=memory_mbytes, - default_run_timeout_secs=600, + default_run_timeout=timedelta(seconds=600), versions=[ { 'versionNumber': '0.0', 'buildTag': 'latest', - 'sourceType': ActorSourceType.SOURCE_FILES, + 'sourceType': 'SOURCE_FILES', 'sourceFiles': source_files_for_api, } ], ) - actor_client = client.actor(created_actor['id']) + actor_client = client.actor(created_actor.id) print(f'Building Actor {actor_name}...') build_result = await actor_client.build(version_number='0.0') - build_client = client.build(build_result['id']) - build_client_result = await build_client.wait_for_finish(wait_secs=600) + build_client = client.build(build_result.id) + build_client_result = await build_client.wait_for_finish(wait_duration=timedelta(seconds=600)) assert build_client_result is not None - assert build_client_result['status'] == ActorJobStatus.SUCCEEDED + assert build_client_result.status == 'SUCCEEDED' # We only mark the client for cleanup if the build succeeded, so that if something goes wrong here, # you have a chance to check the error. - actors_for_cleanup.append(created_actor['id']) + actors_for_cleanup.append(created_actor.id) return actor_client @@ -330,17 +340,31 @@ async def _make_actor( # Delete all the generated Actors. for actor_id in actors_for_cleanup: - actor_client = ApifyClient(token=apify_token, api_url=os.getenv(_API_URL_ENV_VAR)).actor(actor_id) - - if (actor := actor_client.get()) is not None: - actor_client.update( - pricing_infos=[ - *actor.get('pricingInfos', []), - { - 'pricingModel': 'FREE', - }, - ] - ) + api_url = os.getenv(_API_URL_ENV_VAR) + + apify_client = ( + ApifyClient(token=apify_token) if api_url is None else ApifyClient(token=apify_token, api_url=api_url) + ) + + actor_client = apify_client.actor(actor_id) + actor = actor_client.get() + + if actor is not None and actor.pricing_infos is not None: + # Convert Pydantic models to dicts before mixing with plain dict + existing_pricing_infos = [pi.model_dump(by_alias=True, exclude_none=True) for pi in actor.pricing_infos] + # The API requires the new record to start strictly after all existing records. + latest_started_at = max(pi.started_at for pi in actor.pricing_infos) + new_started_at = max(latest_started_at, datetime.now(tz=UTC)) + timedelta(seconds=1) + new_pricing_infos = [ + *existing_pricing_infos, + { + 'pricingModel': 'FREE', + 'apifyMarginPercentage': 0.0, + 'createdAt': new_started_at.isoformat(), + 'startedAt': new_started_at.isoformat(), + }, + ] + actor_client.update(pricing_infos=new_pricing_infos) actor_client.delete() @@ -355,7 +379,7 @@ def __call__( run_input: Any = None, max_total_charge_usd: Decimal | None = None, force_permission_level: ActorPermissionLevel | None = None, - ) -> Coroutine[None, None, ActorRun]: + ) -> Coroutine[None, None, Run]: """Initiate an Actor run and wait for its completion. Args: @@ -382,19 +406,20 @@ async def _run_actor( run_input: Any = None, max_total_charge_usd: Decimal | None = None, force_permission_level: ActorPermissionLevel | None = None, - ) -> ActorRun: + ) -> Run: call_result = await actor.call( run_input=run_input, max_total_charge_usd=max_total_charge_usd, force_permission_level=force_permission_level, ) - assert isinstance(call_result, dict), 'The result of ActorClientAsync.call() is not a dictionary.' - assert 'id' in call_result, 'The result of ActorClientAsync.call() does not contain an ID.' + assert call_result is not None, 'Failed to start Actor run: missing run ID in the response.' + + run_client = apify_client_async.run(call_result.id) + client_actor_run = await run_client.wait_for_finish(wait_duration=timedelta(seconds=600)) - run_client = apify_client_async.run(call_result['id']) - run_result = await run_client.wait_for_finish(wait_secs=600) + assert client_actor_run is not None, 'Actor run did not finish successfully within the expected time.' - return ActorRun.model_validate(run_result) + return client_actor_run return _run_actor diff --git a/tests/e2e/test_actor_api_helpers.py b/tests/e2e/test_actor_api_helpers.py index 3747dd3b..759a5d31 100644 --- a/tests/e2e/test_actor_api_helpers.py +++ b/tests/e2e/test_actor_api_helpers.py @@ -2,14 +2,13 @@ import asyncio import json +from datetime import timedelta from typing import TYPE_CHECKING -from apify_shared.consts import ActorPermissionLevel from crawlee._utils.crypto import crypto_random_object_id from ._utils import generate_unique_resource_name from apify import Actor -from apify._models import ActorRun if TYPE_CHECKING: from apify_client import ApifyClientAsync @@ -62,7 +61,7 @@ async def test_actor_creates_new_client_instance( async def main() -> None: import os - from apify_shared.consts import ActorEnvVars + from apify._consts import ActorEnvVars async with Actor: new_client = Actor.new_client() @@ -129,12 +128,15 @@ async def main_outer() -> None: inner_run_status = await Actor.apify_client.actor(inner_actor_id).last_run().get() assert inner_run_status is not None - assert inner_run_status.get('status') in ['READY', 'RUNNING'] + assert inner_run_status.status in {'READY', 'RUNNING'} inner_actor = await make_actor(label='start-inner', main_func=main_inner) outer_actor = await make_actor(label='start-outer', main_func=main_outer) - inner_actor_id = (await inner_actor.get() or {})['id'] + inner_actor_get_result = await inner_actor.get() + assert inner_actor_get_result is not None, 'Failed to get inner actor ID' + + inner_actor_id = inner_actor_get_result.id test_value = crypto_random_object_id() run_result_outer = await run_actor( @@ -144,7 +146,7 @@ async def main_outer() -> None: assert run_result_outer.status == 'SUCCEEDED' - await inner_actor.last_run().wait_for_finish(wait_secs=600) + await inner_actor.last_run().wait_for_finish(wait_duration=timedelta(seconds=600)) inner_output_record = await inner_actor.last_run().key_value_store().get_record('OUTPUT') assert inner_output_record is not None @@ -172,14 +174,18 @@ async def main_outer() -> None: await Actor.call(inner_actor_id, run_input={'test_value': test_value}) - inner_run_status = await Actor.apify_client.actor(inner_actor_id).last_run().get() - assert inner_run_status is not None - assert inner_run_status.get('status') == 'SUCCEEDED' + run_result_inner = await Actor.apify_client.actor(inner_actor_id).last_run().get() + + assert run_result_inner is not None + assert run_result_inner.status == 'SUCCEEDED' inner_actor = await make_actor(label='call-inner', main_func=main_inner) outer_actor = await make_actor(label='call-outer', main_func=main_outer) - inner_actor_id = (await inner_actor.get() or {})['id'] + inner_actor_get_result = await inner_actor.get() + assert inner_actor_get_result is not None, 'Failed to get inner actor ID' + + inner_actor_id = inner_actor_get_result.id test_value = crypto_random_object_id() run_result_outer = await run_actor( @@ -189,7 +195,7 @@ async def main_outer() -> None: assert run_result_outer.status == 'SUCCEEDED' - await inner_actor.last_run().wait_for_finish(wait_secs=600) + await inner_actor.last_run().wait_for_finish(wait_duration=timedelta(seconds=600)) inner_output_record = await inner_actor.last_run().key_value_store().get_record('OUTPUT') assert inner_output_record is not None @@ -217,14 +223,18 @@ async def main_outer() -> None: await Actor.call_task(inner_task_id) - inner_run_status = await Actor.apify_client.task(inner_task_id).last_run().get() - assert inner_run_status is not None - assert inner_run_status.get('status') == 'SUCCEEDED' + run_result_inner = await Actor.apify_client.task(inner_task_id).last_run().get() + + assert run_result_inner is not None + assert run_result_inner.status == 'SUCCEEDED' inner_actor = await make_actor(label='call-task-inner', main_func=main_inner) outer_actor = await make_actor(label='call-task-outer', main_func=main_outer) - inner_actor_id = (await inner_actor.get() or {})['id'] + inner_actor_get_result = await inner_actor.get() + assert inner_actor_get_result is not None, 'Failed to get inner actor ID' + + inner_actor_id = inner_actor_get_result.id test_value = crypto_random_object_id() task = await apify_client_async.tasks().create( @@ -235,19 +245,19 @@ async def main_outer() -> None: run_result_outer = await run_actor( outer_actor, - run_input={'test_value': test_value, 'inner_task_id': task['id']}, - force_permission_level=ActorPermissionLevel.FULL_PERMISSIONS, + run_input={'test_value': test_value, 'inner_task_id': task.id}, + force_permission_level='FULL_PERMISSIONS', ) assert run_result_outer.status == 'SUCCEEDED' - await inner_actor.last_run().wait_for_finish(wait_secs=600) + await inner_actor.last_run().wait_for_finish(wait_duration=timedelta(seconds=600)) inner_output_record = await inner_actor.last_run().key_value_store().get_record('OUTPUT') assert inner_output_record is not None assert inner_output_record['value'] == f'{test_value}_XXX_{test_value}' - await apify_client_async.task(task['id']).delete() + await apify_client_async.task(task.id).delete() async def test_actor_aborts_another_actor_run( @@ -272,20 +282,24 @@ async def main_outer() -> None: inner_actor = await make_actor(label='abort-inner', main_func=main_inner) outer_actor = await make_actor(label='abort-outer', main_func=main_outer) - run_result_inner = await inner_actor.start(force_permission_level=ActorPermissionLevel.FULL_PERMISSIONS) - inner_run_id = run_result_inner['id'] + run_result_inner = await inner_actor.start(force_permission_level='FULL_PERMISSIONS') + inner_run_id = run_result_inner.id run_result_outer = await run_actor( outer_actor, run_input={'inner_run_id': inner_run_id}, - force_permission_level=ActorPermissionLevel.FULL_PERMISSIONS, + force_permission_level='FULL_PERMISSIONS', ) assert run_result_outer.status == 'SUCCEEDED' - await inner_actor.last_run().wait_for_finish(wait_secs=600) - inner_actor_last_run_dict = await inner_actor.last_run().get() - inner_actor_last_run = ActorRun.model_validate(inner_actor_last_run_dict) + inner_actor_run_client = inner_actor.last_run() + inner_actor_run = await inner_actor_run_client.wait_for_finish(wait_duration=timedelta(seconds=600)) + + if inner_actor_run is None: + raise AssertionError('Failed to get inner actor run after aborting it.') + + inner_actor_last_run = inner_actor_run assert inner_actor_last_run.status == 'ABORTED' @@ -300,7 +314,7 @@ async def test_actor_metamorphs_into_another_actor( async def main_inner() -> None: import os - from apify_shared.consts import ActorEnvVars + from apify._consts import ActorEnvVars async with Actor: assert os.getenv(ActorEnvVars.INPUT_KEY) is not None @@ -331,7 +345,10 @@ async def main_outer() -> None: inner_actor = await make_actor(label='metamorph-inner', main_func=main_inner) outer_actor = await make_actor(label='metamorph-outer', main_func=main_outer) - inner_actor_id = (await inner_actor.get() or {})['id'] + inner_actor_get_result = await inner_actor.get() + assert inner_actor_get_result is not None, 'Failed to get inner actor ID' + + inner_actor_id = inner_actor_get_result.id test_value = crypto_random_object_id() run_result_outer = await run_actor( @@ -395,7 +412,7 @@ async def main_server() -> None: import os from http.server import BaseHTTPRequestHandler, HTTPServer - from apify_shared.consts import ActorEnvVars + from apify._consts import ActorEnvVars webhook_body = '' @@ -425,16 +442,17 @@ def do_POST(self) -> None: await Actor.set_value('WEBHOOK_BODY', webhook_body) async def main_client() -> None: - from apify import Webhook, WebhookEventType + from apify import WebhookCondition, WebhookCreate async with Actor: actor_input = await Actor.get_input() or {} server_actor_container_url = str(actor_input.get('server_actor_container_url')) await Actor.add_webhook( - Webhook( - event_types=[WebhookEventType.ACTOR_RUN_SUCCEEDED], + WebhookCreate( + event_types=['ACTOR.RUN.SUCCEEDED'], request_url=server_actor_container_url, + condition=WebhookCondition(), ) ) @@ -444,7 +462,7 @@ async def main_client() -> None: ) server_actor_run = await server_actor.start() - server_actor_container_url = server_actor_run['containerUrl'] + server_actor_container_url = server_actor_run.container_url server_actor_initialized = await server_actor.last_run().key_value_store().get_record('INITIALIZED') while not server_actor_initialized: @@ -458,8 +476,13 @@ async def main_client() -> None: assert ac_run_result.status == 'SUCCEEDED' - sa_run_result_dict = await server_actor.last_run().wait_for_finish(wait_secs=600) - sa_run_result = ActorRun.model_validate(sa_run_result_dict) + sa_run_client = server_actor.last_run() + sa_run_client_run = await sa_run_client.wait_for_finish(wait_duration=timedelta(seconds=600)) + + if sa_run_client_run is None: + raise AssertionError('Failed to get server actor run after waiting for finish.') + + sa_run_result = sa_run_client_run assert sa_run_result.status == 'SUCCEEDED' diff --git a/tests/e2e/test_actor_charge.py b/tests/e2e/test_actor_charge.py index f8b1f393..04a6388a 100644 --- a/tests/e2e/test_actor_charge.py +++ b/tests/e2e/test_actor_charge.py @@ -6,16 +6,13 @@ import pytest_asyncio -from apify_shared.consts import ActorJobStatus - from apify import Actor -from apify._models import ActorRun if TYPE_CHECKING: from collections.abc import Iterable from apify_client import ApifyClientAsync - from apify_client.clients import ActorClientAsync + from apify_client._resource_clients import ActorClientAsync from .conftest import MakeActorFunction, RunActorFunction @@ -35,6 +32,9 @@ async def main() -> None: pricing_infos=[ { 'pricingModel': 'PAY_PER_EVENT', + 'apifyMarginPercentage': 0.0, + 'createdAt': '2024-01-01T00:00:00.000Z', + 'startedAt': '2024-01-01T00:00:00.000Z', 'pricingPerEvent': { 'actorChargeEvents': { 'push-item': { @@ -56,7 +56,7 @@ async def main() -> None: actor = await actor_client.get() assert actor is not None - return str(actor['id']) + return actor.id @pytest_asyncio.fixture(scope='function', loop_scope='module') @@ -85,6 +85,9 @@ async def main() -> None: pricing_infos=[ { 'pricingModel': 'PAY_PER_EVENT', + 'apifyMarginPercentage': 0.0, + 'createdAt': '2024-01-01T00:00:00.000Z', + 'startedAt': '2024-01-01T00:00:00.000Z', 'pricingPerEvent': { 'actorChargeEvents': { 'foobar': { @@ -95,13 +98,13 @@ async def main() -> None: }, }, }, - ] + ], ) actor = await actor_client.get() assert actor is not None - return str(actor['id']) + return str(actor.id) @pytest_asyncio.fixture(scope='function', loop_scope='module') @@ -129,11 +132,15 @@ async def test_actor_charge_basic( # Refetch until the platform gets its act together for is_last_attempt, _ in retry_counter(30): await asyncio.sleep(1) - updated_run = await apify_client_async.run(run.id).get() - run = ActorRun.model_validate(updated_run) + + run_client = apify_client_async.run(run.id) + updated_run = await run_client.get() + assert updated_run is not None, 'Updated run should not be None' + + run = updated_run try: - assert run.status == ActorJobStatus.SUCCEEDED + assert run.status == 'SUCCEEDED' assert run.charged_event_counts == {'foobar': 4} break except AssertionError: @@ -146,17 +153,21 @@ async def test_actor_charge_limit( run_actor: RunActorFunction, apify_client_async: ApifyClientAsync, ) -> None: - run = await run_actor(ppe_actor, max_total_charge_usd=Decimal('0.2')) + run_result = await run_actor(ppe_actor, max_total_charge_usd=Decimal('0.2')) # Refetch until the platform gets its act together for is_last_attempt, _ in retry_counter(30): await asyncio.sleep(1) - updated_run = await apify_client_async.run(run.id).get() - run = ActorRun.model_validate(updated_run) + + run_client = apify_client_async.run(run_result.id) + updated_run = await run_client.get() + assert updated_run is not None, 'Updated run should not be None' + + run_result = updated_run try: - assert run.status == ActorJobStatus.SUCCEEDED - assert run.charged_event_counts == {'foobar': 2} + assert run_result.status == 'SUCCEEDED' + assert run_result.charged_event_counts == {'foobar': 2} break except AssertionError: if is_last_attempt: @@ -177,10 +188,11 @@ async def test_actor_push_data_charges_both_events( for is_last_attempt, _ in retry_counter(120): await asyncio.sleep(1) updated_run = await apify_client_async.run(run.id).get() - run = ActorRun.model_validate(updated_run) + assert updated_run is not None + run = updated_run try: - assert run.status == ActorJobStatus.SUCCEEDED + assert run.status == 'SUCCEEDED' assert run.charged_event_counts == { 'push-item': 5, 'apify-default-dataset-item': 5, @@ -208,10 +220,11 @@ async def test_actor_push_data_combined_budget_limit( for is_last_attempt, _ in retry_counter(120): await asyncio.sleep(1) updated_run = await apify_client_async.run(run.id).get() - run = ActorRun.model_validate(updated_run) + assert updated_run is not None + run = updated_run try: - assert run.status == ActorJobStatus.SUCCEEDED + assert run.status == 'SUCCEEDED' assert run.charged_event_counts == { 'push-item': 2, 'apify-default-dataset-item': 2, diff --git a/tests/e2e/test_actor_events.py b/tests/e2e/test_actor_events.py index ce2bf399..d15561ae 100644 --- a/tests/e2e/test_actor_events.py +++ b/tests/e2e/test_actor_events.py @@ -3,8 +3,6 @@ import asyncio from typing import TYPE_CHECKING -from apify_shared.consts import ActorEventTypes - from apify import Actor if TYPE_CHECKING: @@ -22,29 +20,30 @@ async def main() -> None: from datetime import datetime from typing import Any - from apify_shared.consts import ActorEventTypes, ApifyEnvVars from crawlee.events._types import Event, EventSystemInfoData + from apify._consts import ApifyEnvVars + os.environ[ApifyEnvVars.PERSIST_STATE_INTERVAL_MILLIS] = '900' was_system_info_emitted = False system_infos = list[EventSystemInfoData]() - def on_event(event_type: ActorEventTypes) -> Callable: + def on_event(event_type: str) -> Callable: async def log_event(data: Any) -> None: nonlocal was_system_info_emitted nonlocal system_infos print(f'Got actor event ({event_type=}, {data=})') await Actor.push_data({'event_type': event_type, 'data': data}) - if event_type == ActorEventTypes.SYSTEM_INFO: + if event_type == 'systemInfo': was_system_info_emitted = True system_infos.append(data) return log_event async with Actor: - Actor.on(Event.SYSTEM_INFO, on_event(ActorEventTypes.SYSTEM_INFO)) - Actor.on(Event.PERSIST_STATE, on_event(ActorEventTypes.PERSIST_STATE)) + Actor.on(Event.SYSTEM_INFO, on_event('systemInfo')) + Actor.on(Event.PERSIST_STATE, on_event('persistState')) await asyncio.sleep(3) # The SYSTEM_INFO event sometimes takes a while to appear, let's wait for it for a while longer. @@ -63,11 +62,43 @@ async def log_event(data: Any) -> None: assert run_result.status == 'SUCCEEDED' dataset_items_page = await actor.last_run().dataset().list_items() - persist_state_events = [ - item for item in dataset_items_page.items if item['event_type'] == ActorEventTypes.PERSIST_STATE - ] - system_info_events = [ - item for item in dataset_items_page.items if item['event_type'] == ActorEventTypes.SYSTEM_INFO - ] + persist_state_events = [item for item in dataset_items_page.items if item['event_type'] == 'persistState'] + system_info_events = [item for item in dataset_items_page.items if item['event_type'] == 'systemInfo'] assert len(persist_state_events) > 2 assert len(system_info_events) > 0 + + +async def test_event_listener_can_be_removed_successfully( + make_actor: MakeActorFunction, + run_actor: RunActorFunction, +) -> None: + async def main() -> None: + import os + from typing import Any + + from crawlee.events._types import Event + + from apify._consts import ApifyEnvVars + + os.environ[ApifyEnvVars.PERSIST_STATE_INTERVAL_MILLIS] = '100' + + counter = 0 + + def count_event(data: Any) -> None: + nonlocal counter + print(data) + counter += 1 + + async with Actor: + Actor.on(Event.PERSIST_STATE, count_event) + await asyncio.sleep(0.5) + assert counter > 1 + last_count = counter + Actor.off(Event.PERSIST_STATE, count_event) + await asyncio.sleep(0.5) + assert counter == last_count + + actor = await make_actor(label='actor-off-event', main_func=main) + run_result = await run_actor(actor) + + assert run_result.status == 'SUCCEEDED' diff --git a/tests/e2e/test_actor_lifecycle.py b/tests/e2e/test_actor_lifecycle.py index 983b8ca3..10dc6925 100644 --- a/tests/e2e/test_actor_lifecycle.py +++ b/tests/e2e/test_actor_lifecycle.py @@ -8,6 +8,56 @@ from .conftest import MakeActorFunction, RunActorFunction +async def test_actor_init_and_double_init_prevention( + make_actor: MakeActorFunction, + run_actor: RunActorFunction, +) -> None: + async def main() -> None: + my_actor = Actor + await my_actor.init() + assert my_actor._active is True + double_init = False + try: + await my_actor.init() + double_init = True + except RuntimeError as err: + assert str(err) == 'The Actor was already initialized!' # noqa: PT017 + except Exception: + raise + try: + await Actor.init() + double_init = True + except RuntimeError as err: + assert str(err) == 'The Actor was already initialized!' # noqa: PT017 + except Exception: + raise + await my_actor.exit() + assert double_init is False + assert my_actor._active is False + + actor = await make_actor(label='actor-init', main_func=main) + run_result = await run_actor(actor) + + assert run_result.status == 'SUCCEEDED' + + +async def test_actor_init_correctly_in_async_with_block( + make_actor: MakeActorFunction, + run_actor: RunActorFunction, +) -> None: + async def main() -> None: + import apify._actor + + async with Actor: + assert apify._actor.Actor._active + assert apify._actor.Actor._active is False + + actor = await make_actor(label='with-actor-init', main_func=main) + run_result = await run_actor(actor) + + assert run_result.status == 'SUCCEEDED' + + async def test_actor_exit_with_different_exit_codes( make_actor: MakeActorFunction, run_actor: RunActorFunction, @@ -86,8 +136,8 @@ async def main() -> None: requests = ['https://example.com/1', 'https://example.com/2'] run = await Actor.apify_client.run(Actor.configuration.actor_run_id or '').get() - assert run - first_run = run.get('stats', {}).get('rebootCount', 0) == 0 + assert run is not None + first_run = run.stats.reboot_count == 0 @crawler.router.default_handler async def default_handler(context: BasicCrawlingContext) -> None: diff --git a/tests/e2e/test_actor_request_queue.py b/tests/e2e/test_actor_request_queue.py index 81f75919..5bdb4647 100644 --- a/tests/e2e/test_actor_request_queue.py +++ b/tests/e2e/test_actor_request_queue.py @@ -1,9 +1,9 @@ from __future__ import annotations +from datetime import timedelta from typing import TYPE_CHECKING from apify import Actor -from apify._models import ActorRun if TYPE_CHECKING: from apify_client import ApifyClientAsync @@ -32,14 +32,17 @@ async def main() -> None: actor = await make_actor(label='rq-clients-resurrection', main_func=main) run_result = await run_actor(actor) assert run_result.status == 'SUCCEEDED' - # Resurrect the run, the RequestQueue should still use same client key and thus not have multiple clients. run_client = apify_client_async.run(run_id=run_result.id) # Redirect logs even from the resurrected run streamed_log = await run_client.get_streamed_log(from_start=False) await run_client.resurrect() + async with streamed_log: - run_result = ActorRun.model_validate(await run_client.wait_for_finish(wait_secs=600)) + raw_run_result = await run_client.wait_for_finish(wait_duration=timedelta(seconds=600)) + assert raw_run_result is not None + + run_result = raw_run_result assert run_result.status == 'SUCCEEDED' diff --git a/tests/e2e/test_actor_scrapy.py b/tests/e2e/test_actor_scrapy.py index c7327b58..363b84e7 100644 --- a/tests/e2e/test_actor_scrapy.py +++ b/tests/e2e/test_actor_scrapy.py @@ -3,8 +3,6 @@ from pathlib import Path from typing import TYPE_CHECKING -from apify_shared.consts import ActorPermissionLevel - if TYPE_CHECKING: from .conftest import MakeActorFunction, RunActorFunction @@ -37,7 +35,7 @@ async def test_actor_scrapy_title_spider( 'allowedDomains': ['crawlee.dev'], 'proxyConfiguration': {'useApifyProxy': True}, }, - force_permission_level=ActorPermissionLevel.FULL_PERMISSIONS, + force_permission_level='FULL_PERMISSIONS', ) assert run_result.status == 'SUCCEEDED' diff --git a/tests/e2e/test_crawlee/conftest.py b/tests/e2e/test_crawlee/conftest.py index 9965e5cc..0f2344de 100644 --- a/tests/e2e/test_crawlee/conftest.py +++ b/tests/e2e/test_crawlee/conftest.py @@ -5,9 +5,8 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from apify_client.clients.resource_clients import ActorClientAsync - - from apify._models import ActorRun + from apify_client._models import Run + from apify_client._resource_clients import ActorClientAsync _PYTHON_VERSION = f'{sys.version_info[0]}.{sys.version_info[1]}' @@ -34,7 +33,7 @@ def get_playwright_dockerfile() -> str: async def verify_crawler_results( actor: ActorClientAsync, - run_result: ActorRun, + run_result: Run, expected_crawler_type: str, ) -> None: """Verify dataset items and KVS record after a crawler Actor run.""" diff --git a/tests/e2e/test_fixtures.py b/tests/e2e/test_fixtures.py index 3e443a10..ec2a02bf 100644 --- a/tests/e2e/test_fixtures.py +++ b/tests/e2e/test_fixtures.py @@ -20,7 +20,7 @@ async def test_actor_from_main_func( async def main() -> None: import os - from apify_shared.consts import ActorEnvVars + from apify._consts import ActorEnvVars async with Actor: await Actor.set_value('OUTPUT', os.getenv(ActorEnvVars.ID)) diff --git a/tests/e2e/test_scrapy/conftest.py b/tests/e2e/test_scrapy/conftest.py index e19f6c36..93a09d79 100644 --- a/tests/e2e/test_scrapy/conftest.py +++ b/tests/e2e/test_scrapy/conftest.py @@ -4,9 +4,8 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from apify_client.clients.resource_clients import ActorClientAsync - - from apify._models import ActorRun + from apify_client._models import Run + from apify_client._resource_clients import ActorClientAsync _ACTOR_SOURCE_DIR = Path(__file__).parent / 'actor_source' @@ -43,7 +42,7 @@ def get_scrapy_source_files( async def verify_spider_results( actor: ActorClientAsync, - run_result: ActorRun, + run_result: Run, *, expected_products: dict[str, dict[str, str]] | None = None, ) -> None: diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 30aa077d..512b2732 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -6,11 +6,11 @@ import pytest from apify_client import ApifyClientAsync -from apify_shared.consts import ApifyEnvVars from crawlee import service_locator import apify._actor from apify import Actor +from apify._consts import ApifyEnvVars from apify.storage_clients import ApifyStorageClient from apify.storage_clients._apify._alias_resolving import AliasResolver from apify.storages import RequestQueue @@ -37,8 +37,7 @@ def apify_token() -> str: def apify_client_async(apify_token: str) -> ApifyClientAsync: """Create an instance of the ApifyClientAsync.""" api_url = os.getenv(_API_URL_ENV_VAR) - - return ApifyClientAsync(apify_token, api_url=api_url) + return ApifyClientAsync(apify_token) if api_url is None else ApifyClientAsync(apify_token, api_url=api_url) @pytest.fixture diff --git a/tests/integration/test_dataset.py b/tests/integration/test_dataset.py index 5a5d1b92..2cb28b02 100644 --- a/tests/integration/test_dataset.py +++ b/tests/integration/test_dataset.py @@ -4,10 +4,9 @@ import pytest -from apify_shared.consts import ApifyEnvVars - from ._utils import generate_unique_resource_name from apify import Actor +from apify._consts import ApifyEnvVars from apify.storage_clients import ApifyStorageClient from apify.storages import Dataset @@ -117,7 +116,7 @@ async def test_force_cloud( try: dataset_details = await dataset_client.get() assert dataset_details is not None - assert dataset_details.get('name') == dataset_name + assert dataset_details.name == dataset_name dataset_items = await dataset_client.list_items() assert dataset_items.items == [dataset_item] diff --git a/tests/integration/test_key_value_store.py b/tests/integration/test_key_value_store.py index 912ecfb0..8673eb09 100644 --- a/tests/integration/test_key_value_store.py +++ b/tests/integration/test_key_value_store.py @@ -4,11 +4,11 @@ import pytest -from apify_shared.consts import ApifyEnvVars from crawlee import service_locator from ._utils import generate_unique_resource_name from apify import Actor +from apify._consts import ApifyEnvVars from apify.storage_clients import ApifyStorageClient from apify.storage_clients._apify._alias_resolving import AliasResolver from apify.storages import KeyValueStore @@ -156,7 +156,7 @@ async def test_force_cloud( try: key_value_store_details = await key_value_store_client.get() assert key_value_store_details is not None - assert key_value_store_details.get('name') == key_value_store_name + assert key_value_store_details.name == key_value_store_name key_value_store_record = await key_value_store_client.get_record('foo') assert key_value_store_record is not None diff --git a/tests/integration/test_request_queue.py b/tests/integration/test_request_queue.py index 496dcabc..8a2b0dc6 100644 --- a/tests/integration/test_request_queue.py +++ b/tests/integration/test_request_queue.py @@ -8,12 +8,13 @@ import pytest -from apify_shared.consts import ApifyEnvVars +from apify_client._models import BatchAddResult, RequestDraft from crawlee import service_locator from crawlee.crawlers import BasicCrawler from ._utils import call_with_exp_backoff, generate_unique_resource_name, poll_until_condition from apify import Actor, Request +from apify._consts import ApifyEnvVars from apify.storage_clients import ApifyStorageClient from apify.storage_clients._apify import ApifyRequestQueueClient from apify.storage_clients._apify._utils import unique_key_to_request_id @@ -887,7 +888,7 @@ async def test_request_queue_had_multiple_clients( # Check that it is correctly in the API api_response = await api_client.get() assert api_response - assert api_response['hadMultipleClients'] is True + assert api_response.had_multiple_clients is True async def test_request_queue_not_had_multiple_clients( @@ -906,7 +907,7 @@ async def test_request_queue_not_had_multiple_clients( api_client = apify_client_async.request_queue(request_queue_id=rq.id) api_response = await api_client.get() assert api_response - assert api_response['hadMultipleClients'] is False + assert api_response.had_multiple_clients is False async def test_request_queue_simple_and_full_at_the_same_time( @@ -1005,13 +1006,13 @@ async def _get_rq_metadata() -> ApifyRequestQueueMetadata: metadata = await poll_until_condition( _get_rq_metadata, - lambda m: m.stats.read_count - stats_before.read_count >= len(requests), + lambda m: (m.stats.read_count or 0) - (stats_before.read_count or 0) >= len(requests), ) stats_after = metadata.stats Actor.log.info(stats_after) # Cache was actually initialized, readCount increased - assert (stats_after.read_count - stats_before.read_count) == len(requests) + assert (stats_after.read_count or 0) - (stats_before.read_count or 0) == len(requests) # Deduplication happened locally, writeCount should be the same assert stats_after.write_count == stats_before.write_count @@ -1107,11 +1108,11 @@ async def test_force_cloud( request_queue_details = await request_queue_client.get() assert request_queue_details is not None - assert request_queue_details.get('name') == request_queue_apify.name + assert request_queue_details.name == request_queue_apify.name request_queue_request = await request_queue_client.get_request(request_info.id) assert request_queue_request is not None - assert request_queue_request['url'] == 'http://example.com' + assert str(request_queue_request.url) == 'http://example.com' async def test_request_queue_is_finished( @@ -1149,22 +1150,31 @@ async def test_request_queue_deduplication_unprocessed_requests( # Get raw client, because stats are not exposed in `RequestQueue` class, but are available in raw client rq_client = Actor.apify_client.request_queue(request_queue_id=request_queue_apify.id) _rq = await rq_client.get() - assert _rq - stats_before = _rq.get('stats', {}) + assert _rq is not None + stats_before = _rq.stats Actor.log.info(stats_before) - def return_unprocessed_requests(requests: list[dict], *_: Any, **__: Any) -> dict[str, list[dict]]: + assert stats_before is not None + assert stats_before.write_count is not None + + def return_unprocessed_requests(requests: list[dict], *_: Any, **__: Any) -> BatchAddResult: """Simulate API returning unprocessed requests.""" - return { - 'processedRequests': [], - 'unprocessedRequests': [ - {'url': request['url'], 'uniqueKey': request['uniqueKey'], 'method': request['method']} - for request in requests - ], - } + unprocessed_requests = [ + RequestDraft.model_construct( + url=request['url'], + unique_key=request['uniqueKey'], + method=request['method'], + ) + for request in requests + ] + + return BatchAddResult.model_construct( + processed_requests=[], + unprocessed_requests=unprocessed_requests, + ) with mock.patch( - 'apify_client.clients.resource_clients.request_queue.RequestQueueClientAsync.batch_add_requests', + 'apify_client._resource_clients.request_queue.RequestQueueClientAsync.batch_add_requests', side_effect=return_unprocessed_requests, ): # Simulate failed API call for adding requests. Request was not processed and should not be cached. @@ -1176,15 +1186,16 @@ def return_unprocessed_requests(requests: list[dict], *_: Any, **__: Any) -> dic # Poll until stats reflect the successful write. async def _get_rq_stats() -> dict: result = await rq_client.get() - return (result or {}).get('stats', {}) + return result.stats.model_dump(by_alias=True) if result and result.stats else {} - stats_after = await poll_until_condition( + _stats_before = stats_before.model_dump(by_alias=True) if stats_before else {} + stats_after_dict = await poll_until_condition( _get_rq_stats, - lambda s: s.get('writeCount', 0) - stats_before.get('writeCount', 0) >= 1, + lambda s: s.get('writeCount', 0) - _stats_before.get('writeCount', 0) >= 1, ) - Actor.log.info(stats_after) + Actor.log.info(stats_after_dict) - assert (stats_after['writeCount'] - stats_before['writeCount']) == 1 + assert (stats_after_dict['writeCount'] - _stats_before['writeCount']) == 1 async def test_request_queue_api_fail_when_marking_as_handled( @@ -1273,7 +1284,7 @@ async def test_request_queue_deduplication( rq_client = apify_client_async.request_queue(request_queue_id=rq.id) _rq = await rq_client.get() assert _rq - stats_before = _rq.get('stats', {}) + stats_before = _rq.stats # Add same request twice (same unique_key because same URL with default unique key) request1 = Request.from_url('http://example.com', method='POST') @@ -1282,16 +1293,17 @@ async def test_request_queue_deduplication( await rq.add_request(request2) # Poll until stats reflect the write. - async def _get_rq_stats() -> dict: + async def _get_rq_stats_dedup() -> dict: result = await rq_client.get() - return (result or {}).get('stats', {}) + return result.stats.model_dump(by_alias=True) if result and result.stats else {} - stats_after = await poll_until_condition( - _get_rq_stats, - lambda s: s.get('writeCount', 0) - stats_before.get('writeCount', 0) >= 1, + _stats_before_dedup = stats_before.model_dump(by_alias=True) if stats_before else {} + stats_after_dict = await poll_until_condition( + _get_rq_stats_dedup, + lambda s: s.get('writeCount', 0) - _stats_before_dedup.get('writeCount', 0) >= 1, ) - assert (stats_after['writeCount'] - stats_before['writeCount']) == 1 + assert (stats_after_dict['writeCount'] - _stats_before_dedup['writeCount']) == 1 async def test_request_queue_deduplication_use_extended_unique_key( @@ -1306,7 +1318,7 @@ async def test_request_queue_deduplication_use_extended_unique_key( rq_client = apify_client_async.request_queue(request_queue_id=rq.id) _rq = await rq_client.get() assert _rq - stats_before = _rq.get('stats', {}) + stats_before = _rq.stats request1 = Request.from_url('http://example.com', method='POST', use_extended_unique_key=True) request2 = Request.from_url('http://example.com', method='GET', use_extended_unique_key=True) @@ -1314,16 +1326,17 @@ async def test_request_queue_deduplication_use_extended_unique_key( await rq.add_request(request2) # Poll until stats reflect both writes. - async def _get_rq_stats() -> dict: + async def _get_rq_stats_ext() -> dict: result = await rq_client.get() - return (result or {}).get('stats', {}) + return result.stats.model_dump(by_alias=True) if result and result.stats else {} - stats_after = await poll_until_condition( - _get_rq_stats, - lambda s: s.get('writeCount', 0) - stats_before.get('writeCount', 0) >= 2, + _stats_before_ext = stats_before.model_dump(by_alias=True) if stats_before else {} + stats_after_dict = await poll_until_condition( + _get_rq_stats_ext, + lambda s: s.get('writeCount', 0) - _stats_before_ext.get('writeCount', 0) >= 2, ) - assert (stats_after['writeCount'] - stats_before['writeCount']) == 2 + assert (stats_after_dict['writeCount'] - _stats_before_ext['writeCount']) == 2 async def test_request_queue_parallel_deduplication( @@ -1340,7 +1353,7 @@ async def test_request_queue_parallel_deduplication( rq_client = apify_client_async.request_queue(request_queue_id=rq.id) _rq = await rq_client.get() assert _rq - stats_before = _rq.get('stats', {}) + stats_before = _rq.stats requests = [Request.from_url(f'http://example.com/{i}') for i in range(max_requests)] batch_size = iter(range(10, max_requests + 1, int(max_requests / worker_count))) @@ -1352,16 +1365,17 @@ async def add_requests_worker() -> None: await asyncio.gather(*add_requests_workers) # Poll until stats reflect all written requests. - async def _get_rq_stats() -> dict: + async def _get_rq_stats_concurrent() -> dict: result = await rq_client.get() - return (result or {}).get('stats', {}) + return result.stats.model_dump(by_alias=True) if result and result.stats else {} - stats_after = await poll_until_condition( - _get_rq_stats, - lambda s: s.get('writeCount', 0) - stats_before.get('writeCount', 0) >= len(requests), + _stats_before_concurrent = stats_before.model_dump(by_alias=True) if stats_before else {} + stats_after_dict = await poll_until_condition( + _get_rq_stats_concurrent, + lambda s: s.get('writeCount', 0) - _stats_before_concurrent.get('writeCount', 0) >= len(requests), ) - assert (stats_after['writeCount'] - stats_before['writeCount']) == len(requests) + assert (stats_after_dict['writeCount'] - _stats_before_concurrent['writeCount']) == len(requests) async def test_concurrent_processing_simulation(apify_token: str, monkeypatch: pytest.MonkeyPatch) -> None: diff --git a/tests/unit/actor/test_actor_charge.py b/tests/unit/actor/test_actor_charge.py index 4e452e78..8d09ef95 100644 --- a/tests/unit/actor/test_actor_charge.py +++ b/tests/unit/actor/test_actor_charge.py @@ -5,9 +5,10 @@ from typing import NamedTuple from unittest.mock import AsyncMock, Mock, patch +from apify_client._models import PayPerEventActorPricingInfo + from apify import Actor, Configuration from apify._charging import ChargingManagerImplementation, PricingInfoItem -from apify._models import PayPerEventActorPricingInfo class MockedChargingSetup(NamedTuple): @@ -32,7 +33,7 @@ async def setup_mocked_charging( setup.charging_mgr._pricing_info['event'] = PricingInfoItem(Decimal('1.0'), 'Event') result = await Actor.charge('event', count=1) - setup.mock_charge.assert_called_once_with('event', 1) + setup.mock_charge.assert_called_once_with('event', count=1) """ # Mock the ApifyClientAsync mock_client = Mock() @@ -76,7 +77,7 @@ async def test_actor_charge_push_data_with_no_remaining_budget() -> None: result1 = await Actor.charge('some-event', count=1) # Costs $1, leaving $0.5 # Verify the first charge call was made correctly - setup.mock_charge.assert_called_once_with('some-event', 1) + setup.mock_charge.assert_called_once_with('some-event', count=1) setup.mock_charge.reset_mock() assert result1.charged_count == 1 @@ -111,7 +112,7 @@ async def test_actor_charge_api_call_verification() -> None: # Call charge with count=1 - this SHOULD call the API result2 = await Actor.charge('test-event', count=1) - setup.mock_charge.assert_called_once_with('test-event', 1) + setup.mock_charge.assert_called_once_with('test-event', count=1) assert result2.charged_count == 1 @@ -124,15 +125,20 @@ async def test_max_event_charge_count_within_limit_tolerates_overdraw() -> None: actor_pricing_info=PayPerEventActorPricingInfo.model_validate( { 'pricingModel': 'PAY_PER_EVENT', + 'apifyMarginPercentage': 0.0, + 'createdAt': '2024-01-01T00:00:00.000Z', + 'startedAt': '2024-01-01T00:00:00.000Z', 'pricingPerEvent': { 'actorChargeEvents': { 'event': { 'eventPriceUsd': 0.0003, 'eventTitle': 'Event', + 'eventDescription': 'Event description', }, 'apify-actor-start': { 'eventPriceUsd': 0.00005, 'eventTitle': 'Actor start', + 'eventDescription': 'Actor start description', }, } }, @@ -235,15 +241,20 @@ async def test_charge_with_overdrawn_budget() -> None: actor_pricing_info=PayPerEventActorPricingInfo.model_validate( { 'pricingModel': 'PAY_PER_EVENT', + 'apifyMarginPercentage': 0.0, + 'createdAt': '2024-01-01T00:00:00.000Z', + 'startedAt': '2024-01-01T00:00:00.000Z', 'pricingPerEvent': { 'actorChargeEvents': { 'event': { 'eventPriceUsd': 0.0003, 'eventTitle': 'Event', + 'eventDescription': 'Event description', }, 'apify-actor-start': { 'eventPriceUsd': 0.00005, 'eventTitle': 'Actor start', + 'eventDescription': 'Actor start description', }, } }, diff --git a/tests/unit/actor/test_actor_create_proxy_configuration.py b/tests/unit/actor/test_actor_create_proxy_configuration.py index 86622b71..9d409ee2 100644 --- a/tests/unit/actor/test_actor_create_proxy_configuration.py +++ b/tests/unit/actor/test_actor_create_proxy_configuration.py @@ -6,9 +6,9 @@ import pytest from apify_client import ApifyClientAsync -from apify_shared.consts import ApifyEnvVars from apify import Actor +from apify._consts import ApifyEnvVars if TYPE_CHECKING: from pytest_httpserver import HTTPServer @@ -21,7 +21,7 @@ @pytest.fixture def patched_apify_client(apify_client_async_patcher: ApifyClientAsyncPatcher) -> ApifyClientAsync: - apify_client_async_patcher.patch('user', 'get', return_value={'proxy': {'password': DUMMY_PASSWORD}}) + apify_client_async_patcher.patch('user', 'get', return_value=Mock(proxy=Mock(password=DUMMY_PASSWORD))) return ApifyClientAsync() diff --git a/tests/unit/actor/test_actor_env_helpers.py b/tests/unit/actor/test_actor_env_helpers.py index 25e337bb..b539b5a2 100644 --- a/tests/unit/actor/test_actor_env_helpers.py +++ b/tests/unit/actor/test_actor_env_helpers.py @@ -8,18 +8,78 @@ from pydantic_core import TzInfo -from apify_shared.consts import ( - BOOL_ENV_VARS, - COMMA_SEPARATED_LIST_ENV_VARS, - DATETIME_ENV_VARS, - FLOAT_ENV_VARS, - INTEGER_ENV_VARS, - STRING_ENV_VARS, - ActorEnvVars, - ApifyEnvVars, -) - from apify import Actor +from apify._consts import ActorEnvVars, ApifyEnvVars + +INTEGER_ENV_VARS: list[ActorEnvVars | ApifyEnvVars] = [ + ActorEnvVars.MAX_PAID_DATASET_ITEMS, + ActorEnvVars.MEMORY_MBYTES, + ActorEnvVars.STANDBY_PORT, + ActorEnvVars.WEB_SERVER_PORT, + ApifyEnvVars.DEDICATED_CPUS, + ApifyEnvVars.LOG_LEVEL, + ApifyEnvVars.METAMORPH_AFTER_SLEEP_MILLIS, + ApifyEnvVars.PERSIST_STATE_INTERVAL_MILLIS, + ApifyEnvVars.PROXY_PORT, + ApifyEnvVars.SYSTEM_INFO_INTERVAL_MILLIS, +] + +FLOAT_ENV_VARS: list[ActorEnvVars | ApifyEnvVars] = [ + ActorEnvVars.MAX_TOTAL_CHARGE_USD, + ApifyEnvVars.MAX_USED_CPU_RATIO, +] + +BOOL_ENV_VARS: list[ApifyEnvVars] = [ + ApifyEnvVars.DISABLE_BROWSER_SANDBOX, + ApifyEnvVars.DISABLE_OUTDATED_WARNING, + ApifyEnvVars.HEADLESS, + ApifyEnvVars.IS_AT_HOME, + ApifyEnvVars.PERSIST_STORAGE, + ApifyEnvVars.PURGE_ON_START, + ApifyEnvVars.USER_IS_PAYING, +] + +DATETIME_ENV_VARS: list[ActorEnvVars] = [ + ActorEnvVars.STARTED_AT, + ActorEnvVars.TIMEOUT_AT, +] + +STRING_ENV_VARS: list[ActorEnvVars | ApifyEnvVars] = [ + ActorEnvVars.BUILD_ID, + ActorEnvVars.BUILD_NUMBER, + ActorEnvVars.DEFAULT_DATASET_ID, + ActorEnvVars.DEFAULT_KEY_VALUE_STORE_ID, + ActorEnvVars.DEFAULT_REQUEST_QUEUE_ID, + ActorEnvVars.EVENTS_WEBSOCKET_URL, + ActorEnvVars.FULL_NAME, + ActorEnvVars.ID, + ActorEnvVars.INPUT_KEY, + ActorEnvVars.PERMISSION_LEVEL, + ActorEnvVars.RUN_ID, + ActorEnvVars.STANDBY_URL, + ActorEnvVars.TASK_ID, + ActorEnvVars.WEB_SERVER_URL, + ApifyEnvVars.API_BASE_URL, + ApifyEnvVars.API_PUBLIC_BASE_URL, + ApifyEnvVars.DEFAULT_BROWSER_PATH, + ApifyEnvVars.FACT, + ApifyEnvVars.INPUT_SECRETS_PRIVATE_KEY_FILE, + ApifyEnvVars.INPUT_SECRETS_PRIVATE_KEY_PASSPHRASE, + ApifyEnvVars.LOCAL_STORAGE_DIR, + ApifyEnvVars.LOG_FORMAT, + ApifyEnvVars.META_ORIGIN, + ApifyEnvVars.PROXY_HOSTNAME, + ApifyEnvVars.PROXY_PASSWORD, + ApifyEnvVars.PROXY_STATUS_URL, + ApifyEnvVars.SDK_LATEST_VERSION, + ApifyEnvVars.TOKEN, + ApifyEnvVars.USER_ID, + ApifyEnvVars.WORKFLOW_KEY, +] + +COMMA_SEPARATED_LIST_ENV_VARS: list[ActorEnvVars] = [ + ActorEnvVars.BUILD_TAGS, +] if TYPE_CHECKING: from pathlib import Path diff --git a/tests/unit/actor/test_actor_helpers.py b/tests/unit/actor/test_actor_helpers.py index b90a5d13..40cd108e 100644 --- a/tests/unit/actor/test_actor_helpers.py +++ b/tests/unit/actor/test_actor_helpers.py @@ -9,56 +9,60 @@ import pytest from apify_client import ApifyClientAsync -from apify_shared.consts import ApifyEnvVars, WebhookEventType +from apify_client._models import Run from crawlee.events._types import Event -from apify import Actor, Webhook +from apify import Actor, Webhook, WebhookCondition, WebhookCreate from apify._actor import _ActorType +from apify._consts import ApifyEnvVars if TYPE_CHECKING: from ..conftest import ApifyClientAsyncPatcher @pytest.fixture -def fake_actor_run() -> dict: - return { - 'id': 'asdfasdf', - 'buildId': '3ads35', - 'buildNumber': '3.4.5', - 'actId': 'actor_id', - 'actorId': 'actor_id', - 'userId': 'user_id', - 'startedAt': '2024-08-08 12:12:44', - 'status': 'RUNNING', - 'meta': {'origin': 'API'}, - 'containerUrl': 'http://0.0.0.0:3333', - 'defaultDatasetId': 'dhasdrfughaerguoi', - 'defaultKeyValueStoreId': 'asjkldhguiofg', - 'defaultRequestQueueId': 'lkjgklserjghios', - 'stats': { - 'inputBodyLen': 0, - 'restartCount': 0, - 'resurrectCount': 0, - 'memAvgBytes': 0, - 'memMaxBytes': 0, - 'memCurrentBytes': 0, - 'cpuAvgUsage': 0, - 'cpuMaxUsage': 0, - 'cpuCurrentUsage': 0, - 'netRxBytes': 0, - 'netTxBytes': 0, - 'durationMillis': 3333, - 'runTimeSecs': 33, - 'metamorph': 0, - 'computeUnits': 4.33, - }, - 'options': { - 'build': '', - 'timeoutSecs': 44, - 'memoryMbytes': 4096, - 'diskMbytes': 16384, - }, - } +def fake_actor_run() -> Run: + return Run.model_validate( + { + 'id': 'asdfasdf', + 'buildId': '3ads35', + 'buildNumber': '3.4.5', + 'actId': 'actor_id', + 'actorId': 'actor_id', + 'userId': 'user_id', + 'startedAt': '2024-08-08T12:12:44Z', + 'status': 'RUNNING', + 'meta': {'origin': 'API'}, + 'containerUrl': 'http://0.0.0.0:3333', + 'defaultDatasetId': 'dhasdrfughaerguoi', + 'defaultKeyValueStoreId': 'asjkldhguiofg', + 'defaultRequestQueueId': 'lkjgklserjghios', + 'generalAccess': 'RESTRICTED', + 'stats': { + 'inputBodyLen': 0, + 'restartCount': 0, + 'resurrectCount': 0, + 'memAvgBytes': 0, + 'memMaxBytes': 0, + 'memCurrentBytes': 0, + 'cpuAvgUsage': 0, + 'cpuMaxUsage': 0, + 'cpuCurrentUsage': 0, + 'netRxBytes': 0, + 'netTxBytes': 0, + 'durationMillis': 3333, + 'runTimeSecs': 33, + 'metamorph': 0, + 'computeUnits': 4.33, + }, + 'options': { + 'build': '', + 'timeoutSecs': 44, + 'memoryMbytes': 4096, + 'diskMbytes': 16384, + }, + } + ) async def test_new_client_config_creation(monkeypatch: pytest.MonkeyPatch) -> None: @@ -79,7 +83,7 @@ async def test_new_client_config_creation(monkeypatch: pytest.MonkeyPatch) -> No await my_actor.exit() -async def test_call_actor(apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: dict) -> None: +async def test_call_actor(apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: Run) -> None: apify_client_async_patcher.patch('actor', 'call', return_value=fake_actor_run) actor_id = 'some-actor-id' @@ -91,7 +95,7 @@ async def test_call_actor(apify_client_async_patcher: ApifyClientAsyncPatcher, f assert apify_client_async_patcher.calls['actor']['call'][0][0][0].resource_id == actor_id -async def test_call_actor_task(apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: dict) -> None: +async def test_call_actor_task(apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: Run) -> None: apify_client_async_patcher.patch('task', 'call', return_value=fake_actor_run) task_id = 'some-task-id' @@ -102,7 +106,7 @@ async def test_call_actor_task(apify_client_async_patcher: ApifyClientAsyncPatch assert apify_client_async_patcher.calls['task']['call'][0][0][0].resource_id == task_id -async def test_start_actor(apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: dict) -> None: +async def test_start_actor(apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: Run) -> None: apify_client_async_patcher.patch('actor', 'start', return_value=fake_actor_run) actor_id = 'some-id' @@ -113,7 +117,7 @@ async def test_start_actor(apify_client_async_patcher: ApifyClientAsyncPatcher, assert apify_client_async_patcher.calls['actor']['start'][0][0][0].resource_id == actor_id -async def test_abort_actor_run(apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: dict) -> None: +async def test_abort_actor_run(apify_client_async_patcher: ApifyClientAsyncPatcher, fake_actor_run: Run) -> None: apify_client_async_patcher.patch('run', 'abort', return_value=fake_actor_run) run_id = 'some-run-id' @@ -153,7 +157,11 @@ async def test_add_webhook_fails_locally(caplog: pytest.LogCaptureFixture) -> No caplog.set_level('WARNING') async with Actor: await Actor.add_webhook( - Webhook(event_types=[WebhookEventType.ACTOR_BUILD_ABORTED], request_url='https://example.com') + WebhookCreate( + event_types=['ACTOR.BUILD.ABORTED'], + request_url='https://example.com', + condition=WebhookCondition(), + ) ) matching = [r for r in caplog.records if 'Actor.add_webhook()' in r.message] @@ -235,7 +243,7 @@ async def test_remote_method_with_webhooks( actor_method = getattr(Actor, actor_method_name) await actor_method( entity_id, - webhooks=[Webhook(event_types=[WebhookEventType.ACTOR_RUN_SUCCEEDED], request_url='https://example.com')], + webhooks=[Webhook(event_types=['ACTOR.RUN.SUCCEEDED'], request_url='https://example.com')], ) calls = apify_client_async_patcher.calls[client_resource][client_method] @@ -264,7 +272,7 @@ async def test_remote_method_with_timedelta_timeout( calls = apify_client_async_patcher.calls[client_resource][client_method] assert len(calls) == 1 _, kwargs = calls[0][0], calls[0][1] - assert kwargs.get('timeout_secs') == 120 + assert kwargs.get('run_timeout') == timedelta(seconds=120) async def test_call_actor_with_remaining_time_deprecation( diff --git a/tests/unit/actor/test_actor_key_value_store.py b/tests/unit/actor/test_actor_key_value_store.py index 581d775d..94bd959f 100644 --- a/tests/unit/actor/test_actor_key_value_store.py +++ b/tests/unit/actor/test_actor_key_value_store.py @@ -4,12 +4,11 @@ import pytest -from apify_shared.consts import ApifyEnvVars from crawlee._utils.file import json_dumps from ..test_crypto import PRIVATE_KEY_PASSWORD, PRIVATE_KEY_PEM_BASE64, PUBLIC_KEY from apify import Actor -from apify._consts import ENCRYPTED_JSON_VALUE_PREFIX, ENCRYPTED_STRING_VALUE_PREFIX +from apify._consts import ENCRYPTED_JSON_VALUE_PREFIX, ENCRYPTED_STRING_VALUE_PREFIX, ApifyEnvVars from apify._crypto import public_encrypt diff --git a/tests/unit/actor/test_actor_lifecycle.py b/tests/unit/actor/test_actor_lifecycle.py index 03fdd00e..04dbd4d8 100644 --- a/tests/unit/actor/test_actor_lifecycle.py +++ b/tests/unit/actor/test_actor_lifecycle.py @@ -2,16 +2,22 @@ import asyncio import contextlib +import json import logging -from typing import TYPE_CHECKING -from unittest.mock import AsyncMock +from datetime import UTC, datetime +from typing import TYPE_CHECKING, Any +from unittest import mock +from unittest.mock import AsyncMock, Mock import pytest +import websockets +import websockets.asyncio.server -from apify_shared.consts import ActorExitCodes, ApifyEnvVars -from crawlee.events._types import Event +from apify_client._models import Run +from crawlee.events._types import Event, EventPersistStateData from apify import Actor +from apify._consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars if TYPE_CHECKING: from collections.abc import AsyncGenerator, Callable @@ -212,6 +218,90 @@ def on_event(event_type: Event) -> Callable: assert on_system_info_count == len(on_system_info) +async def test_actor_handles_migrating_event_correctly(monkeypatch: pytest.MonkeyPatch) -> None: + """Test that Actor handles MIGRATING events correctly by emitting PERSIST_STATE.""" + # This should test whether when you get a MIGRATING event, + # the Actor automatically emits the PERSIST_STATE event with data `{'isMigrating': True}` + monkeypatch.setenv(ApifyEnvVars.IS_AT_HOME, '1') + monkeypatch.setenv(ActorEnvVars.RUN_ID, 'asdf') + + persist_state_events_data = [] + + def log_persist_state(data: Any) -> None: + nonlocal persist_state_events_data + persist_state_events_data.append(data) + + async def handler(websocket: websockets.asyncio.server.ServerConnection) -> None: + await websocket.wait_closed() + + async with websockets.asyncio.server.serve(handler, host='localhost') as ws_server: + port: int = ws_server.sockets[0].getsockname()[1] + monkeypatch.setenv(ActorEnvVars.EVENTS_WEBSOCKET_URL, f'ws://localhost:{port}') + + mock_run_client = Mock() + mock_run_client.run.return_value.get = AsyncMock( + side_effect=lambda: Run.model_validate( + { + 'id': 'asdf', + 'actId': 'asdf', + 'userId': 'adsf', + 'startedAt': datetime.now(UTC).isoformat(), + 'status': 'RUNNING', + 'meta': {'origin': 'DEVELOPMENT'}, + 'buildId': 'hjkl', + 'defaultDatasetId': 'hjkl', + 'defaultKeyValueStoreId': 'hjkl', + 'defaultRequestQueueId': 'hjkl', + 'containerUrl': 'https://hjkl', + 'buildNumber': '0.0.1', + 'generalAccess': 'RESTRICTED', + 'stats': { + 'restartCount': 0, + 'resurrectCount': 0, + 'computeUnits': 1, + }, + 'options': { + 'build': 'asdf', + 'timeoutSecs': 4, + 'memoryMbytes': 1024, + 'diskMbytes': 1024, + }, + } + ) + ) + + with mock.patch.object(Actor, 'new_client', return_value=mock_run_client): + async with Actor: + Actor.on(Event.PERSIST_STATE, log_persist_state) + await asyncio.sleep(2) + + for socket in ws_server.connections: + await socket.send( + json.dumps( + { + 'name': 'migrating', + 'data': { + 'isMigrating': True, + }, + } + ) + ) + + await asyncio.sleep(1) + + # It is enough to check the persist state event we send manually and the crawler final one. + assert len(persist_state_events_data) >= 2 + + # Expect last event to be is_migrating=False (persistence event on exiting EventManager) + assert persist_state_events_data.pop() == EventPersistStateData(is_migrating=False) + # Expect second last event to be is_migrating=True (emitted on MIGRATING event) + assert persist_state_events_data.pop() == EventPersistStateData(is_migrating=True) + + # Check if all the other events are regular persist state events + for event_data in persist_state_events_data: + assert event_data == EventPersistStateData(is_migrating=False) + + async def test_actor_fail_prevents_further_execution(caplog: pytest.LogCaptureFixture) -> None: """Test that calling Actor.fail() prevents further code execution in the Actor context.""" caplog.set_level(logging.INFO) diff --git a/tests/unit/actor/test_charging_manager.py b/tests/unit/actor/test_charging_manager.py index 6fb84b2c..b2047485 100644 --- a/tests/unit/actor/test_charging_manager.py +++ b/tests/unit/actor/test_charging_manager.py @@ -6,13 +6,14 @@ import pytest -from apify._charging import ChargingManagerImplementation -from apify._configuration import Configuration -from apify._models import ( +from apify_client._models import ( ActorChargeEvent, PayPerEventActorPricingInfo, ) +from apify._charging import ChargingManagerImplementation +from apify._configuration import Configuration + def _make_config(**kwargs: Any) -> Configuration: """Helper to create a Configuration with sensible defaults for charging tests. @@ -49,12 +50,17 @@ def _make_ppe_pricing_info(events: dict[str, Decimal] | None = None) -> PayPerEv if events is None: events = {'search': Decimal('0.01'), 'scrape': Decimal('0.05')} charge_events = { - name: ActorChargeEvent.model_validate({'eventPriceUsd': price, 'eventTitle': f'{name} event'}) + name: ActorChargeEvent.model_validate( + {'eventPriceUsd': price, 'eventTitle': f'{name} event', 'eventDescription': f'{name} event description'} + ) for name, price in events.items() } return PayPerEventActorPricingInfo.model_validate( { 'pricingModel': 'PAY_PER_EVENT', + 'apifyMarginPercentage': 0.0, + 'createdAt': '2024-01-01T00:00:00.000Z', + 'startedAt': '2024-01-01T00:00:00.000Z', 'pricingPerEvent': { 'actorChargeEvents': {name: event.model_dump(by_alias=True) for name, event in charge_events.items()} }, diff --git a/tests/unit/actor/test_configuration.py b/tests/unit/actor/test_configuration.py index 1448ecca..0fd686dd 100644 --- a/tests/unit/actor/test_configuration.py +++ b/tests/unit/actor/test_configuration.py @@ -320,8 +320,17 @@ def test_actor_pricing_info_from_json_env_var(monkeypatch: pytest.MonkeyPatch) - pricing_json = json.dumps( { 'pricingModel': 'PAY_PER_EVENT', + 'apifyMarginPercentage': 0.0, + 'createdAt': '2024-01-01T00:00:00.000Z', + 'startedAt': '2024-01-01T00:00:00.000Z', 'pricingPerEvent': { - 'actorChargeEvents': {'search': {'eventPriceUsd': '0.01', 'eventTitle': 'Search event'}} + 'actorChargeEvents': { + 'search': { + 'eventPriceUsd': '0.01', + 'eventTitle': 'Search event', + 'eventDescription': 'Search event description', + } + } }, } ) @@ -331,6 +340,39 @@ def test_actor_pricing_info_from_json_env_var(monkeypatch: pytest.MonkeyPatch) - assert config.actor_pricing_info.pricing_model == 'PAY_PER_EVENT' +def test_actor_pricing_info_env_var_tolerates_platform_omissions(monkeypatch: pytest.MonkeyPatch) -> None: + """The platform env var may omit fields that apify-client models require; they should be injected with defaults.""" + + pricing_json = json.dumps( + { + 'pricingModel': 'PAY_PER_EVENT', + 'pricingPerEvent': { + 'actorChargeEvents': { + 'search': { + 'eventPriceUsd': '0.01', + 'eventTitle': 'Search event', + } + } + }, + } + ) + monkeypatch.setenv('APIFY_ACTOR_PRICING_INFO', pricing_json) + config = ApifyConfiguration() + assert config.actor_pricing_info is not None + assert config.actor_pricing_info.pricing_model == 'PAY_PER_EVENT' + + +@pytest.mark.parametrize('env_value', ['', '{}']) +def test_actor_pricing_info_env_var_empty_becomes_none(monkeypatch: pytest.MonkeyPatch, env_value: str) -> None: + """Platform sends `APIFY_ACTOR_PRICING_INFO={}` for Actors without a pricing model. + + Without a `pricingModel` discriminator, the pydantic union cannot resolve — treat it as no pricing info. + """ + monkeypatch.setenv('APIFY_ACTOR_PRICING_INFO', env_value) + config = ApifyConfiguration() + assert config.actor_pricing_info is None + + def test_actor_storage_json_env_var(monkeypatch: pytest.MonkeyPatch) -> None: """Test that actor_storages_json is parsed from JSON env var.""" datasets = {'default': 'default_dataset_id', 'custom': 'custom_dataset_id'} diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 3cd2e32c..c289a3dd 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -12,11 +12,11 @@ from pytest_httpserver import HTTPServer from apify_client import ApifyClientAsync -from apify_shared.consts import ApifyEnvVars from crawlee import service_locator import apify._actor import apify.log +from apify._consts import ApifyEnvVars from apify.storage_clients._apify._alias_resolving import AliasResolver if TYPE_CHECKING: @@ -62,6 +62,7 @@ def prepare_test_env(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> Callabl def _prepare_test_env() -> None: if hasattr(apify._actor.Actor, '__wrapped__'): delattr(apify._actor.Actor, '__wrapped__') + apify._actor.Actor._active = False # Set the environment variable for the local storage directory to the temporary path. diff --git a/tests/unit/events/test_apify_event_manager.py b/tests/unit/events/test_apify_event_manager.py index eb8dd375..2b8ff412 100644 --- a/tests/unit/events/test_apify_event_manager.py +++ b/tests/unit/events/test_apify_event_manager.py @@ -14,10 +14,10 @@ import websockets.asyncio.server import websockets.exceptions -from apify_shared.consts import ActorEnvVars from crawlee.events._types import Event from apify import Configuration +from apify._consts import ActorEnvVars from apify.events import ApifyEventManager from apify.events._types import SystemInfoEventData diff --git a/tests/unit/storage_clients/test_apify_kvs_client.py b/tests/unit/storage_clients/test_apify_kvs_client.py index 4e5b4c6b..5bba9b7d 100644 --- a/tests/unit/storage_clients/test_apify_kvs_client.py +++ b/tests/unit/storage_clients/test_apify_kvs_client.py @@ -6,6 +6,8 @@ import pytest +from apify_client._models import ListOfKeys + from apify.storage_clients._apify._key_value_store_client import ApifyKeyValueStoreClient @@ -53,13 +55,18 @@ async def test_iterate_keys_single_page() -> None: """Test iterating keys with a single page of results.""" api_client = AsyncMock() api_client.list_keys = AsyncMock( - return_value={ - 'items': [{'key': 'key1', 'size': 100}, {'key': 'key2', 'size': 200}], - 'count': 2, - 'limit': 1000, - 'isTruncated': False, - 'nextExclusiveStartKey': None, - } + return_value=ListOfKeys.model_validate( + { + 'items': [ + {'key': 'key1', 'size': 100, 'recordPublicUrl': 'https://example.com/key1'}, + {'key': 'key2', 'size': 200, 'recordPublicUrl': 'https://example.com/key2'}, + ], + 'count': 2, + 'limit': 1000, + 'isTruncated': False, + 'nextExclusiveStartKey': None, + } + ) ) client, _ = _make_kvs_client(api_client=api_client) @@ -73,13 +80,17 @@ async def test_iterate_keys_with_limit() -> None: """Test that iterate_keys respects the limit parameter.""" api_client = AsyncMock() api_client.list_keys = AsyncMock( - return_value={ - 'items': [{'key': f'key{i}', 'size': 100} for i in range(5)], - 'count': 5, - 'limit': 1000, - 'isTruncated': True, - 'nextExclusiveStartKey': 'key4', - } + return_value=ListOfKeys.model_validate( + { + 'items': [ + {'key': f'key{i}', 'size': 100, 'recordPublicUrl': f'https://example.com/key{i}'} for i in range(5) + ], + 'count': 5, + 'limit': 1000, + 'isTruncated': True, + 'nextExclusiveStartKey': 'key4', + } + ) ) client, _ = _make_kvs_client(api_client=api_client) @@ -89,20 +100,24 @@ async def test_iterate_keys_with_limit() -> None: async def test_iterate_keys_pagination() -> None: """Test that iterate_keys handles pagination across multiple pages.""" - page1 = { - 'items': [{'key': 'key1', 'size': 100}], - 'count': 1, - 'limit': 1000, - 'isTruncated': True, - 'nextExclusiveStartKey': 'key1', - } - page2 = { - 'items': [{'key': 'key2', 'size': 200}], - 'count': 1, - 'limit': 1000, - 'isTruncated': False, - 'nextExclusiveStartKey': None, - } + page1 = ListOfKeys.model_validate( + { + 'items': [{'key': 'key1', 'size': 100, 'recordPublicUrl': 'https://example.com/key1'}], + 'count': 1, + 'limit': 1000, + 'isTruncated': True, + 'nextExclusiveStartKey': 'key1', + } + ) + page2 = ListOfKeys.model_validate( + { + 'items': [{'key': 'key2', 'size': 200, 'recordPublicUrl': 'https://example.com/key2'}], + 'count': 1, + 'limit': 1000, + 'isTruncated': False, + 'nextExclusiveStartKey': None, + } + ) api_client = AsyncMock() api_client.list_keys = AsyncMock(side_effect=[page1, page2]) client, _ = _make_kvs_client(api_client=api_client) diff --git a/tests/unit/storage_clients/test_apify_request_queue_client.py b/tests/unit/storage_clients/test_apify_request_queue_client.py index 3b59b844..ae110adf 100644 --- a/tests/unit/storage_clients/test_apify_request_queue_client.py +++ b/tests/unit/storage_clients/test_apify_request_queue_client.py @@ -5,6 +5,7 @@ import pytest +from apify_client._models import RequestQueueHead from crawlee.storage_clients.models import RequestQueueMetadata from apify.storage_clients._apify._request_queue_single_client import ApifyRequestQueueSingleClient @@ -78,7 +79,14 @@ def test_unique_key_to_request_id_matches_known_values(unique_key: str, expected ) async def test_list_head_limit(in_progress_count: int, expected_limit: int) -> None: client, api_client = _make_single_client() - api_client.list_head = AsyncMock(return_value={'items': [], 'hadMultipleClients': False}) + api_client.list_head = AsyncMock( + return_value=RequestQueueHead( + limit=expected_limit, + queue_modified_at=datetime.now(tz=UTC), + had_multiple_clients=False, + items=[], + ) + ) client._requests_in_progress = {f'req_{i}' for i in range(in_progress_count)} await client._list_head() diff --git a/tests/unit/test_proxy_configuration.py b/tests/unit/test_proxy_configuration.py index e0f65f78..d11ec1da 100644 --- a/tests/unit/test_proxy_configuration.py +++ b/tests/unit/test_proxy_configuration.py @@ -11,8 +11,8 @@ import pytest from apify_client import ApifyClientAsync -from apify_shared.consts import ApifyEnvVars +from apify._consts import ApifyEnvVars from apify._proxy_configuration import ProxyConfiguration, is_url if TYPE_CHECKING: @@ -26,15 +26,7 @@ @pytest.fixture def patched_apify_client(apify_client_async_patcher: ApifyClientAsyncPatcher) -> ApifyClientAsync: - apify_client_async_patcher.patch( - 'user', - 'get', - return_value={ - 'proxy': { - 'password': DUMMY_PASSWORD, - }, - }, - ) + apify_client_async_patcher.patch('user', 'get', return_value=Mock(proxy=Mock(password=DUMMY_PASSWORD))) return ApifyClientAsync() diff --git a/uv.lock b/uv.lock index bfdb8297..05840d17 100644 --- a/uv.lock +++ b/uv.lock @@ -40,14 +40,13 @@ version = "3.4.1" source = { editable = "." } dependencies = [ { name = "apify-client" }, - { name = "apify-shared" }, { name = "cachetools" }, { name = "crawlee" }, { name = "cryptography" }, { name = "impit" }, { name = "lazy-object-proxy" }, { name = "more-itertools" }, - { name = "pydantic" }, + { name = "pydantic", extra = ["email"] }, { name = "typing-extensions" }, { name = "websockets" }, { name = "yarl" }, @@ -84,15 +83,14 @@ dev = [ [package.metadata] requires-dist = [ - { name = "apify-client", specifier = ">=2.3.0,<3.0.0" }, - { name = "apify-shared", specifier = ">=2.0.0,<3.0.0" }, + { name = "apify-client", specifier = ">=3.0.0,<4.0.0" }, { name = "cachetools", specifier = ">=5.5.0" }, { name = "crawlee", specifier = ">=1.0.4,<2.0.0" }, { name = "cryptography", specifier = ">=42.0.0" }, { name = "impit", specifier = ">=0.8.0" }, { name = "lazy-object-proxy", specifier = ">=1.11.0" }, { name = "more-itertools", specifier = ">=10.2.0" }, - { name = "pydantic", specifier = ">=2.11.0" }, + { name = "pydantic", extras = ["email"], specifier = ">=2.11.0" }, { name = "scrapy", marker = "extra == 'scrapy'", specifier = ">=2.14.0" }, { name = "typing-extensions", specifier = ">=4.1.0" }, { name = "websockets", specifier = ">=14.0" }, @@ -126,26 +124,17 @@ dev = [ [[package]] name = "apify-client" -version = "2.5.0" +version = "3.0.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "apify-shared" }, { name = "colorama" }, { name = "impit" }, { name = "more-itertools" }, + { name = "pydantic", extra = ["email"] }, ] -sdist = { url = "https://files.pythonhosted.org/packages/78/6a/b872d6bbc84c6aaf27b455492c6ff1bd057fea302c5d40619c733d48a718/apify_client-2.5.0.tar.gz", hash = "sha256:daa2af6a50e573f78bd46a4728a3f2be76cee93cf5c4ff9d0fd38b6756792689", size = 377916, upload-time = "2026-02-18T13:03:16.083Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/2b/82/4fe19adfa6b962ab8a740782b6246b7c499f13edccac24733f015d895725/apify_client-2.5.0-py3-none-any.whl", hash = "sha256:4aa6172bed92d83f2d2bbe1f95cfaab2e147a834dfa007e309fd0b4709423316", size = 86996, upload-time = "2026-02-18T13:03:14.891Z" }, -] - -[[package]] -name = "apify-shared" -version = "2.2.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ec/88/8833a8bba9044ce134bb2e57fbb626f1ddbeecac964bc2e2b652a50fadd1/apify_shared-2.2.0.tar.gz", hash = "sha256:ad48a96084e3c38faa1bac723a47929a1bb2c771544da2f0cb503eabdecfc79a", size = 45534, upload-time = "2026-01-15T10:17:14.592Z" } +sdist = { url = "https://files.pythonhosted.org/packages/64/08/5d6c2fa64fd93b837325368aedaa23f57897030eff31a150873f3aa697e3/apify_client-3.0.2.tar.gz", hash = "sha256:e34dc9c8ac951154a0382adfce785d9658639c373425b43ac54df510d076ac0d", size = 122200, upload-time = "2026-05-26T07:23:43.691Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/75/7c/9607852e2bb324fa40a5b967e162dea1b3c76b429cf90b602e4a202c101a/apify_shared-2.2.0-py3-none-any.whl", hash = "sha256:667d4d00ac3cf8091702640547387ac5c72a1df402bbb3923f7a401bc25d9d50", size = 16408, upload-time = "2026-01-15T10:17:13.103Z" }, + { url = "https://files.pythonhosted.org/packages/d2/32/ac2ab06f8713f9a1313912815f57431d495cf04861a9f9a2436310dcf38e/apify_client-3.0.2-py3-none-any.whl", hash = "sha256:d78e188eb98e8357a21b57d9b31b139410b517eecc2d125c80f054cdc604296f", size = 141197, upload-time = "2026-05-26T07:23:42.039Z" }, ] [[package]] @@ -713,6 +702,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/6b/e0547afaf41bf2c42e52430072fa5658766e3d65bd4b03a563d1b6336f57/distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16", size = 469047, upload-time = "2025-07-17T16:51:58.613Z" }, ] +[[package]] +name = "dnspython" +version = "2.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/8b/57666417c0f90f08bcafa776861060426765fdb422eb10212086fb811d26/dnspython-2.8.0.tar.gz", hash = "sha256:181d3c6996452cb1189c4046c61599b84a5a86e099562ffde77d26984ff26d0f", size = 368251, upload-time = "2025-09-07T18:58:00.022Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" }, +] + [[package]] name = "docspec" version = "2.2.1" @@ -759,6 +757,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f8/1a/25272fafd13c92a2e3b8e351127410b9ea5557324bfea3552388d65797fc/dycw_pytest_only-2.1.1-py3-none-any.whl", hash = "sha256:ea8fe48878dd95ad0ca804e549225cf3b7a1928eb188c22a284c1d17b48a7b89", size = 2413, upload-time = "2025-06-03T01:04:46.585Z" }, ] +[[package]] +name = "email-validator" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "dnspython" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f5/22/900cb125c76b7aaa450ce02fd727f452243f2e91a61af068b40adba60ea9/email_validator-2.3.0.tar.gz", hash = "sha256:9fc05c37f2f6cf439ff414f8fc46d917929974a82244c20eb10231ba60c54426", size = 51238, upload-time = "2025-08-26T13:09:06.831Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" }, +] + [[package]] name = "execnet" version = "2.1.2" @@ -1673,6 +1684,11 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fd/7b/122376b1fd3c62c1ed9dc80c931ace4844b3c55407b6fb2d199377c9736f/pydantic-2.13.4-py3-none-any.whl", hash = "sha256:45a282cde31d808236fd7ea9d919b128653c8b38b393d1c4ab335c62924d9aba", size = 472262, upload-time = "2026-05-06T13:43:02.641Z" }, ] +[package.optional-dependencies] +email = [ + { name = "email-validator" }, +] + [[package]] name = "pydantic-core" version = "2.46.4"