Source code for airflow.providers.amazon.aws.sensors.emr
## Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.importsysfromtypingimportTYPE_CHECKING,Any,Dict,Iterable,Optional,SequenceifTYPE_CHECKING:fromairflow.utils.contextimportContextifsys.version_info>=(3,8):fromfunctoolsimportcached_propertyelse:fromcached_propertyimportcached_propertyfromairflow.exceptionsimportAirflowExceptionfromairflow.providers.amazon.aws.hooks.emrimportEmrContainerHook,EmrHookfromairflow.sensors.baseimportBaseSensorOperator
[docs]classEmrBaseSensor(BaseSensorOperator):""" Contains general sensor behavior for EMR. Subclasses should implement following methods: - ``get_emr_response()`` - ``state_from_response()`` - ``failure_message_from_response()`` Subclasses should set ``target_states`` and ``failed_states`` fields. :param aws_conn_id: aws connection to uses :type aws_conn_id: str """
def__init__(self,*,aws_conn_id:str='aws_default',**kwargs):super().__init__(**kwargs)self.aws_conn_id=aws_conn_idself.target_states:Optional[Iterable[str]]=None# will be set in subclassesself.failed_states:Optional[Iterable[str]]=None# will be set in subclassesself.hook:Optional[EmrHook]=None
[docs]defpoke(self,context:'Context'):response=self.get_emr_response()ifnotresponse['ResponseMetadata']['HTTPStatusCode']==200:self.log.info('Bad HTTP response: %s',response)returnFalsestate=self.state_from_response(response)self.log.info('Job flow currently %s',state)ifstateinself.target_states:returnTrueifstateinself.failed_states:final_message='EMR job failed'failure_message=self.failure_message_from_response(response)iffailure_message:final_message+=' '+failure_messageraiseAirflowException(final_message)returnFalse
[docs]defget_emr_response(self)->Dict[str,Any]:""" Make an API call with boto3 and get response. :return: response :rtype: dict[str, Any] """raiseNotImplementedError('Please implement get_emr_response() in subclass')
@staticmethod
[docs]defstate_from_response(response:Dict[str,Any])->str:""" Get state from response dictionary. :param response: response from AWS API :type response: dict[str, Any] :return: state :rtype: str """raiseNotImplementedError('Please implement state_from_response() in subclass')
@staticmethod
[docs]deffailure_message_from_response(response:Dict[str,Any])->Optional[str]:""" Get failure message from response dictionary. :param response: response from AWS API :type response: dict[str, Any] :return: failure message :rtype: Optional[str] """raiseNotImplementedError('Please implement failure_message_from_response() in subclass')
[docs]classEmrContainerSensor(BaseSensorOperator):""" Asks for the state of the job run until it reaches a failure state or success state. If the job run fails, the task will fail. :param job_id: job_id to check the state of :type job_id: str :param max_retries: Number of times to poll for query state before returning the current state, defaults to None :type max_retries: int :param aws_conn_id: aws connection to use, defaults to 'aws_default' :type aws_conn_id: str :param poll_interval: Time in seconds to wait between two consecutive call to check query status on athena, defaults to 10 :type poll_interval: int """
[docs]defhook(self)->EmrContainerHook:"""Create and return an EmrContainerHook"""returnEmrContainerHook(self.aws_conn_id,virtual_cluster_id=self.virtual_cluster_id)
[docs]classEmrJobFlowSensor(EmrBaseSensor):""" Asks for the state of the EMR JobFlow (Cluster) until it reaches any of the target states. If it fails the sensor errors, failing the task. With the default target states, sensor waits cluster to be terminated. When target_states is set to ['RUNNING', 'WAITING'] sensor waits until job flow to be ready (after 'STARTING' and 'BOOTSTRAPPING' states) :param job_flow_id: job_flow_id to check the state of :type job_flow_id: str :param target_states: the target states, sensor waits until job flow reaches any of these states :type target_states: list[str] :param failed_states: the failure states, sensor fails when job flow reaches any of these states :type failed_states: list[str] """
[docs]defget_emr_response(self)->Dict[str,Any]:""" Make an API call with boto3 and get cluster-level details. .. seealso:: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/emr.html#EMR.Client.describe_cluster :return: response :rtype: dict[str, Any] """emr_client=self.get_hook().get_conn()self.log.info('Poking cluster %s',self.job_flow_id)returnemr_client.describe_cluster(ClusterId=self.job_flow_id)
@staticmethod
[docs]defstate_from_response(response:Dict[str,Any])->str:""" Get state from response dictionary. :param response: response from AWS API :type response: dict[str, Any] :return: current state of the cluster :rtype: str """returnresponse['Cluster']['Status']['State']
@staticmethod
[docs]deffailure_message_from_response(response:Dict[str,Any])->Optional[str]:""" Get failure message from response dictionary. :param response: response from AWS API :type response: dict[str, Any] :return: failure message :rtype: Optional[str] """cluster_status=response['Cluster']['Status']state_change_reason=cluster_status.get('StateChangeReason')ifstate_change_reason:return'for code: {} with message {}'.format(state_change_reason.get('Code','No code'),state_change_reason.get('Message','Unknown'))returnNone
[docs]classEmrStepSensor(EmrBaseSensor):""" Asks for the state of the step until it reaches any of the target states. If it fails the sensor errors, failing the task. With the default target states, sensor waits step to be completed. :param job_flow_id: job_flow_id which contains the step check the state of :type job_flow_id: str :param step_id: step to check the state of :type step_id: str :param target_states: the target states, sensor waits until step reaches any of these states :type target_states: list[str] :param failed_states: the failure states, sensor fails when step reaches any of these states :type failed_states: list[str] """
[docs]defget_emr_response(self)->Dict[str,Any]:""" Make an API call with boto3 and get details about the cluster step. .. seealso:: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/emr.html#EMR.Client.describe_step :return: response :rtype: dict[str, Any] """emr_client=self.get_hook().get_conn()self.log.info('Poking step %s on cluster %s',self.step_id,self.job_flow_id)returnemr_client.describe_step(ClusterId=self.job_flow_id,StepId=self.step_id)
@staticmethod
[docs]defstate_from_response(response:Dict[str,Any])->str:""" Get state from response dictionary. :param response: response from AWS API :type response: dict[str, Any] :return: execution state of the cluster step :rtype: str """returnresponse['Step']['Status']['State']
@staticmethod
[docs]deffailure_message_from_response(response:Dict[str,Any])->Optional[str]:""" Get failure message from response dictionary. :param response: response from AWS API :type response: dict[str, Any] :return: failure message :rtype: Optional[str] """fail_details=response['Step']['Status'].get('FailureDetails')iffail_details:return'for reason {} with message {} and log file {}'.format(fail_details.get('Reason'),fail_details.get('Message'),fail_details.get('LogFile'))returnNone