diff --git a/src/crawlee/_utils/models.py b/src/crawlee/_utils/models.py index 078e9f01d8..678a47250b 100644 --- a/src/crawlee/_utils/models.py +++ b/src/crawlee/_utils/models.py @@ -17,6 +17,14 @@ def _timedelta_to_ms(td: timedelta | None) -> float | None: return int(round(td.total_seconds() * 1000)) +def _timedelta_to_secs(td: timedelta | None) -> float | None: + if td == timedelta.max: + return float('inf') + if td is None: + return td + return td.total_seconds() + + _number_parser = TypeAdapter(float) @@ -35,4 +43,23 @@ def _timedelta_from_ms(value: float | timedelta | Any | None, handler: Callable[ return timedelta(milliseconds=value) +def _timedelta_from_secs( + value: float | timedelta | Any | None, + handler: Callable[[Any], timedelta], +) -> timedelta | None: + if value == float('inf'): + return timedelta.max + + # If the value is a string-encoded number, decode it + if isinstance(value, str): + with suppress(ValidationError): + value = _number_parser.validate_python(value) + + if not isinstance(value, (int, float)): + return handler(value) + + return timedelta(seconds=value) + + timedelta_ms = Annotated[timedelta, PlainSerializer(_timedelta_to_ms), WrapValidator(_timedelta_from_ms)] +timedelta_secs = Annotated[timedelta, PlainSerializer(_timedelta_to_secs), WrapValidator(_timedelta_from_secs)] diff --git a/src/crawlee/events/_types.py b/src/crawlee/events/_types.py index 22c571c58d..aaa59f4b54 100644 --- a/src/crawlee/events/_types.py +++ b/src/crawlee/events/_types.py @@ -7,6 +7,7 @@ from pydantic import BaseModel, ConfigDict, Field from crawlee._utils.docs import docs_group +from crawlee._utils.models import timedelta_secs from crawlee._utils.system import CpuInfo, MemoryUsageInfo @@ -59,6 +60,10 @@ class EventMigratingData(BaseModel): model_config = ConfigDict(populate_by_name=True) + # The remaining time in seconds before the migration is forced and the process is killed + # Optional because it's not present when the event handler is called manually + time_remaining: Annotated[timedelta_secs | None, Field(alias='timeRemainingSecs')] = None + @docs_group('Event payloads') class EventAbortingData(BaseModel):