Source code for airflow.providers.amazon.aws.operators.athena
## Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.#fromtypingimportAny,Dict,Optionalfromuuidimportuuid4try:fromfunctoolsimportcached_propertyexceptImportError:fromcached_propertyimportcached_propertyfromairflow.modelsimportBaseOperatorfromairflow.providers.amazon.aws.hooks.athenaimportAWSAthenaHook
[docs]classAWSAthenaOperator(BaseOperator):""" An operator that submits a presto query to athena. :param query: Presto to be run on athena. (templated) :type query: str :param database: Database to select. (templated) :type database: str :param output_location: s3 path to write the query results into. (templated) :type output_location: str :param aws_conn_id: aws connection to use :type aws_conn_id: str :param client_request_token: Unique token created by user to avoid multiple executions of same query :type client_request_token: str :param workgroup: Athena workgroup in which query will be run :type workgroup: str :param query_execution_context: Context in which query need to be run :type query_execution_context: dict :param result_configuration: Dict with path to store results in and config related to encryption :type result_configuration: dict :param sleep_time: Time (in seconds) to wait between two consecutive calls to check query status on Athena :type sleep_time: int :param max_tries: Number of times to poll for query state before function exits :type max_tries: int """
[docs]defhook(self)->AWSAthenaHook:"""Create and return an AWSAthenaHook."""returnAWSAthenaHook(self.aws_conn_id,sleep_time=self.sleep_time)
[docs]defexecute(self,context:dict)->Optional[str]:"""Run Presto Query on Athena"""self.query_execution_context['Database']=self.databaseself.result_configuration['OutputLocation']=self.output_locationself.query_execution_id=self.hook.run_query(self.query,self.query_execution_context,self.result_configuration,self.client_request_token,self.workgroup,)query_status=self.hook.poll_query_status(self.query_execution_id,self.max_tries)ifquery_statusinAWSAthenaHook.FAILURE_STATES:error_message=self.hook.get_state_change_reason(self.query_execution_id)raiseException('Final state of Athena job is {}, query_execution_id is {}. Error: {}'.format(query_status,self.query_execution_id,error_message))elifnotquery_statusorquery_statusinAWSAthenaHook.INTERMEDIATE_STATES:raiseException('Final state of Athena job is {}. ''Max tries of poll status exceeded, query_execution_id is {}.'.format(query_status,self.query_execution_id))returnself.query_execution_id
[docs]defon_kill(self)->None:"""Cancel the submitted athena query"""ifself.query_execution_id:self.log.info('Received a kill signal.')self.log.info('Stopping Query with executionId - %s',self.query_execution_id)response=self.hook.stop_query(self.query_execution_id)http_status_code=Nonetry:http_status_code=response['ResponseMetadata']['HTTPStatusCode']exceptExceptionasex:self.log.error('Exception while cancelling query: %s',ex)finally:ifhttp_status_codeisNoneorhttp_status_code!=200:self.log.error('Unable to request query cancel on athena. Exiting')else:self.log.info('Polling Athena for query with id %s to reach final state',self.query_execution_id)self.hook.poll_query_status(self.query_execution_id)