Source code for airflow.providers.amazon.aws.triggers.glue_crawler
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
import warnings
from airflow.exceptions import AirflowProviderDeprecationWarning
from airflow.providers.amazon.aws.hooks.base_aws import AwsGenericHook
from airflow.providers.amazon.aws.hooks.glue_crawler import GlueCrawlerHook
from airflow.providers.amazon.aws.triggers.base import AwsBaseWaiterTrigger
[docs]class GlueCrawlerCompleteTrigger(AwsBaseWaiterTrigger):
"""
Watches for a glue crawl, triggers when it finishes.
:param crawler_name: name of the crawler to watch
:param poll_interval: The amount of time in seconds to wait between attempts.
:param aws_conn_id: The Airflow connection used for AWS credentials.
"""
def __init__(
self,
crawler_name: str,
poll_interval: int | None = None,
aws_conn_id: str = "aws_default",
waiter_delay: int = 5,
waiter_max_attempts: int = 1500,
):
if poll_interval is not None:
warnings.warn(
"please use waiter_delay instead of poll_interval.",
AirflowProviderDeprecationWarning,
stacklevel=2,
)
waiter_delay = poll_interval or waiter_delay
super().__init__(
serialized_fields={"crawler_name": crawler_name},
waiter_name="crawler_ready",
waiter_args={"Name": crawler_name},
failure_message="Error while waiting for glue crawl to complete",
status_message="Status of glue crawl is",
status_queries=["Crawler.State", "Crawler.LastCrawl"],
return_value=None,
waiter_delay=waiter_delay,
waiter_max_attempts=waiter_max_attempts,
aws_conn_id=aws_conn_id,
)
[docs] def hook(self) -> AwsGenericHook:
return GlueCrawlerHook(aws_conn_id=self.aws_conn_id)