Source code for airflow.providers.amazon.aws.hooks.appflow

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations

from typing import TYPE_CHECKING

from airflow.providers.amazon.aws.hooks.base_aws import AwsGenericHook
from airflow.providers.amazon.aws.utils.waiter_with_logging import wait

if TYPE_CHECKING:
    from mypy_boto3_appflow.client import AppflowClient  # noqa


[docs]class AppflowHook(AwsGenericHook["AppflowClient"]):
    """
    Interact with Amazon AppFlow.

    Provide thin wrapper around :external+boto3:py:class:`boto3.client("appflow") <Appflow.Client>`.

    Additional arguments (such as ``aws_conn_id``) may be specified and
    are passed down to the underlying AwsBaseHook.

    .. seealso::
        - :class:`airflow.providers.amazon.aws.hooks.base_aws.AwsBaseHook`
        - `Amazon Appflow API Reference <https://docs.aws.amazon.com/appflow/1.0/APIReference/Welcome.html>`__
    """

    def __init__(self, *args, **kwargs) -> None:
        kwargs["client_type"] = "appflow"
        super().__init__(*args, **kwargs)

[docs]    def run_flow(
        self,
        flow_name: str,
        poll_interval: int = 20,
        wait_for_completion: bool = True,
        max_attempts: int = 60,
    ) -> str:
        """
        Execute an AppFlow run.

        :param flow_name: The flow name
        :param poll_interval: Time (seconds) to wait between two consecutive calls to check the run status
        :param wait_for_completion: whether to wait for the run to end to return
        :param max_attempts: the number of polls to do before timing out/returning a failure.
        :return: The run execution ID
        """
        response_start = self.conn.start_flow(flowName=flow_name)
        execution_id = response_start["executionId"]
        self.log.info("executionId: %s", execution_id)

        if wait_for_completion:
            wait(
                waiter=self.get_waiter("run_complete", {"EXECUTION_ID": execution_id}),
                waiter_delay=poll_interval,
                waiter_max_attempts=max_attempts,
                args={"flowName": flow_name},
                failure_message="error while waiting for flow to complete",
                status_message="waiting for flow completion, status",
                status_args=[
                    f"flowExecutions[?executionId=='{execution_id}'].executionStatus",
                    f"flowExecutions[?executionId=='{execution_id}'].executionResult.errorInfo",
                ],
            )
            self._log_execution_description(flow_name, execution_id)

        return execution_id

    def _log_execution_description(self, flow_name: str, execution_id: str):
        response_desc = self.conn.describe_flow_execution_records(flowName=flow_name)
        last_execs = {fe["executionId"]: fe for fe in response_desc["flowExecutions"]}
        exec_details = last_execs[execution_id]
        self.log.info("Run complete, execution details: %s", exec_details)

[docs]    def update_flow_filter(self, flow_name: str, filter_tasks, set_trigger_ondemand: bool = False) -> None:
        """
        Update the flow task filter; all filters will be removed if an empty array is passed to filter_tasks.

        :param flow_name: The flow name
        :param filter_tasks: List flow tasks to be added
        :param set_trigger_ondemand: If True, set the trigger to on-demand; otherwise, keep the trigger as is
        :return: None
        """
        response = self.conn.describe_flow(flowName=flow_name)
        connector_type = response["sourceFlowConfig"]["connectorType"]
        tasks = []

        # cleanup old filter tasks
        for task in response["tasks"]:
            if (
                task["taskType"] == "Filter"
                and task.get("connectorOperator", {}).get(connector_type) != "PROJECTION"
            ):
                self.log.info("Removing task: %s", task)
            else:
                tasks.append(task)  # List of non-filter tasks

        tasks += filter_tasks  # Add the new filter tasks

        if set_trigger_ondemand:
            # Clean up attribute to force on-demand trigger
            del response["triggerConfig"]["triggerProperties"]

        self.conn.update_flow(
            flowName=response["flowName"],
            destinationFlowConfigList=response["destinationFlowConfigList"],
            sourceFlowConfig=response["sourceFlowConfig"],
            triggerConfig=response["triggerConfig"],
            description=response.get("description", "Flow description."),
            tasks=tasks,
        )