Source code for airflow.timetables.trigger
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
import datetime
from typing import TYPE_CHECKING, Any
from airflow.timetables._cron import CronMixin
from airflow.timetables._delta import DeltaMixin
from airflow.timetables.base import DagRunInfo, DataInterval, Timetable
from airflow.utils.timezone import coerce_datetime, utcnow
if TYPE_CHECKING:
from dateutil.relativedelta import relativedelta
from pendulum import DateTime
from pendulum.tz.timezone import FixedTimezone, Timezone
from airflow.timetables.base import TimeRestriction
def _serialize_interval(interval: datetime.timedelta | relativedelta) -> float | dict:
from airflow.serialization.serialized_objects import encode_relativedelta
if isinstance(interval, datetime.timedelta):
return interval.total_seconds()
return encode_relativedelta(interval)
def _deserialize_interval(value: int | dict) -> datetime.timedelta | relativedelta:
from airflow.serialization.serialized_objects import decode_relativedelta
if isinstance(value, dict):
return decode_relativedelta(value)
return datetime.timedelta(seconds=value)
class _TriggerTimetable(Timetable):
_interval: datetime.timedelta | relativedelta
def infer_manual_data_interval(self, *, run_after: DateTime) -> DataInterval:
return DataInterval(
coerce_datetime(run_after - self._interval),
run_after,
)
def _align_to_next(self, current: DateTime) -> DateTime:
raise NotImplementedError()
def _align_to_prev(self, current: DateTime) -> DateTime:
raise NotImplementedError()
def _get_next(self, current: DateTime) -> DateTime:
raise NotImplementedError()
def _get_prev(self, current: DateTime) -> DateTime:
raise NotImplementedError()
def next_dagrun_info(
self,
*,
last_automated_data_interval: DataInterval | None,
restriction: TimeRestriction,
) -> DagRunInfo | None:
if restriction.catchup:
if last_automated_data_interval is not None:
next_start_time = self._get_next(last_automated_data_interval.end)
elif restriction.earliest is None:
return None # Don't know where to catch up from, give up.
else:
next_start_time = self._align_to_next(restriction.earliest)
else:
start_time_candidates = [self._align_to_prev(coerce_datetime(utcnow()))]
if last_automated_data_interval is not None:
start_time_candidates.append(self._get_next(last_automated_data_interval.end))
if restriction.earliest is not None:
start_time_candidates.append(self._align_to_next(restriction.earliest))
next_start_time = max(start_time_candidates)
if restriction.latest is not None and restriction.latest < next_start_time:
return None
return DagRunInfo.interval(
# pendulum.Datetime ± timedelta should return pendulum.Datetime
# however mypy decide that output would be datetime.datetime
next_start_time - self._interval, # type: ignore[arg-type]
next_start_time,
)
[docs]
class DeltaTriggerTimetable(DeltaMixin, _TriggerTimetable):
"""
Timetable that triggers DAG runs according to a cron expression.
This is different from ``DeltaDataIntervalTimetable``, where the delta value
specifies the *data interval* of a DAG run. With this timetable, the data
intervals are specified independently. Also for the same reason, this
timetable kicks off a DAG run immediately at the start of the period,
instead of needing to wait for one data interval to pass.
:param delta: How much time to wait between each run.
:param interval: The data interval of each run. Default is 0.
"""
def __init__(
self,
delta: datetime.timedelta | relativedelta,
*,
interval: datetime.timedelta | relativedelta = datetime.timedelta(),
) -> None:
super().__init__(delta)
self._interval = interval
@classmethod
[docs]
def deserialize(cls, data: dict[str, Any]) -> Timetable:
return cls(
_deserialize_interval(data["delta"]),
interval=_deserialize_interval(data["interval"]),
)
[docs]
def serialize(self) -> dict[str, Any]:
return {
"delta": _serialize_interval(self._delta),
"interval": _serialize_interval(self._interval),
}
[docs]
class CronTriggerTimetable(CronMixin, _TriggerTimetable):
"""
Timetable that triggers DAG runs according to a cron expression.
This is different from ``CronDataIntervalTimetable``, where the cron
expression specifies the *data interval* of a DAG run. With this timetable,
the data intervals are specified independently from the cron expression.
Also for the same reason, this timetable kicks off a DAG run immediately at
the start of the period (similar to POSIX cron), instead of needing to wait
for one data interval to pass.
Don't pass ``@once`` in here; use ``OnceTimetable`` instead.
"""
def __init__(
self,
cron: str,
*,
timezone: str | Timezone | FixedTimezone,
interval: datetime.timedelta | relativedelta = datetime.timedelta(),
) -> None:
super().__init__(cron, timezone)
self._interval = interval
@classmethod
[docs]
def deserialize(cls, data: dict[str, Any]) -> Timetable:
from airflow.serialization.serialized_objects import decode_relativedelta, decode_timezone
interval: datetime.timedelta | relativedelta
if isinstance(data["interval"], dict):
interval = decode_relativedelta(data["interval"])
else:
interval = datetime.timedelta(seconds=data["interval"])
return cls(data["expression"], timezone=decode_timezone(data["timezone"]), interval=interval)
[docs]
def serialize(self) -> dict[str, Any]:
from airflow.serialization.serialized_objects import encode_timezone
return {
"expression": self._expression,
"timezone": encode_timezone(self._timezone),
"interval": _serialize_interval(self._interval),
}