Source code for airflow.providers.amazon.aws.triggers.glue_crawler
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
import warnings
from airflow.exceptions import AirflowProviderDeprecationWarning
from airflow.providers.amazon.aws.hooks.base_aws import AwsGenericHook
from airflow.providers.amazon.aws.hooks.glue_crawler import GlueCrawlerHook
from airflow.providers.amazon.aws.triggers.base import AwsBaseWaiterTrigger
[docs]class GlueCrawlerCompleteTrigger(AwsBaseWaiterTrigger):
    """
    Watches for a glue crawl, triggers when it finishes.
    :param crawler_name: name of the crawler to watch
    :param poll_interval: The amount of time in seconds to wait between attempts.
    :param aws_conn_id: The Airflow connection used for AWS credentials.
    """
    def __init__(
        self,
        crawler_name: str,
        poll_interval: int | None = None,
        aws_conn_id: str = "aws_default",
        waiter_delay: int = 5,
        waiter_max_attempts: int = 1500,
    ):
        if poll_interval is not None:
            warnings.warn(
                "please use waiter_delay instead of poll_interval.",
                AirflowProviderDeprecationWarning,
                stacklevel=2,
            )
            waiter_delay = poll_interval or waiter_delay
        super().__init__(
            serialized_fields={"crawler_name": crawler_name},
            waiter_name="crawler_ready",
            waiter_args={"Name": crawler_name},
            failure_message="Error while waiting for glue crawl to complete",
            status_message="Status of glue crawl is",
            status_queries=["Crawler.State", "Crawler.LastCrawl"],
            return_value=None,
            waiter_delay=waiter_delay,
            waiter_max_attempts=waiter_max_attempts,
            aws_conn_id=aws_conn_id,
        )
[docs]    def hook(self) -> AwsGenericHook:
        return GlueCrawlerHook(aws_conn_id=self.aws_conn_id)