Source code for airflow.providers.amazon.aws.triggers.glue
# Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.from__future__importannotationsimportasynciofromfunctoolsimportcached_propertyfromtypingimportAny,AsyncIteratorfromairflow.providers.amazon.aws.hooks.glueimportGlueJobHookfromairflow.providers.amazon.aws.hooks.glue_catalogimportGlueCatalogHookfromairflow.triggers.baseimportBaseTrigger,TriggerEvent
[docs]classGlueJobCompleteTrigger(BaseTrigger):""" Watches for a glue job, triggers when it finishes. :param job_name: glue job name :param run_id: the ID of the specific run to watch for that job :param verbose: whether to print the job's logs in airflow logs or not :param aws_conn_id: The Airflow connection used for AWS credentials. """def__init__(self,job_name:str,run_id:str,verbose:bool,aws_conn_id:str|None,job_poll_interval:int|float,):super().__init__()self.job_name=job_nameself.run_id=run_idself.verbose=verboseself.aws_conn_id=aws_conn_idself.job_poll_interval=job_poll_interval
[docs]defserialize(self)->tuple[str,dict[str,Any]]:return(# dynamically generate the fully qualified name of the classself.__class__.__module__+"."+self.__class__.__qualname__,{"job_name":self.job_name,"run_id":self.run_id,"verbose":str(self.verbose),"aws_conn_id":self.aws_conn_id,"job_poll_interval":self.job_poll_interval,},)
[docs]classGlueCatalogPartitionTrigger(BaseTrigger):""" Asynchronously waits for a partition to show up in AWS Glue Catalog. :param database_name: The name of the catalog database where the partitions reside. :param table_name: The name of the table to wait for, supports the dot notation (my_database.my_table) :param expression: The partition clause to wait for. This is passed as is to the AWS Glue Catalog API's get_partitions function, and supports SQL like notation as in ``ds='2015-01-01' AND type='value'`` and comparison operators as in ``"ds>=2015-01-01"``. See https://docs.aws.amazon.com/glue/latest/dg/aws-glue-api-catalog-partitions.html #aws-glue-api-catalog-partitions-GetPartitions :param aws_conn_id: ID of the Airflow connection where credentials and extra configuration are stored :param region_name: Optional aws region name (example: us-east-1). Uses region from connection if not specified. :param waiter_delay: Number of seconds to wait between two checks. Default is 60 seconds. """def__init__(self,database_name:str,table_name:str,expression:str="",aws_conn_id:str|None="aws_default",region_name:str|None=None,waiter_delay:int=60,):self.database_name=database_nameself.table_name=table_nameself.expression=expressionself.aws_conn_id=aws_conn_idself.region_name=region_nameself.waiter_delay=waiter_delay
[docs]defserialize(self)->tuple[str,dict[str,Any]]:return(# dynamically generate the fully qualified name of the classself.__class__.__module__+"."+self.__class__.__qualname__,{"database_name":self.database_name,"table_name":self.table_name,"expression":self.expression,"aws_conn_id":self.aws_conn_id,"region_name":self.region_name,"waiter_delay":self.waiter_delay,},)