Source code for tests.system.amazon.aws.example_athena
# Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.from__future__importannotationsfromdatetimeimportdatetimeimportboto3fromairflow.decoratorsimporttaskfromairflow.models.baseoperatorimportchainfromairflow.models.dagimportDAGfromairflow.providers.amazon.aws.hooks.s3importS3Hookfromairflow.providers.amazon.aws.operators.athenaimportAthenaOperatorfromairflow.providers.amazon.aws.operators.s3import(S3CreateBucketOperator,S3CreateObjectOperator,S3DeleteBucketOperator,)fromairflow.providers.amazon.aws.sensors.athenaimportAthenaSensorfromairflow.utils.trigger_ruleimportTriggerRulefromsystem.amazon.aws.utilsimportSystemTestContextBuilder
[docs]defawait_bucket(bucket_name):# Avoid a race condition after creating the S3 Bucket.client=boto3.client("s3")waiter=client.get_waiter("bucket_exists")waiter.wait(Bucket=bucket_name)
env_id=test_context["ENV_ID"]s3_bucket=f"{env_id}-athena-bucket"athena_table=f"{env_id}_test_table"athena_database=f"{env_id}_default"query_create_database=f"CREATE DATABASE IF NOT EXISTS {athena_database}"query_create_table=f"""CREATE EXTERNAL TABLE IF NOT EXISTS {athena_database}.{athena_table} ( `name` string, `age` int ) ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe" WITH SERDEPROPERTIES ( "serialization.format" = ",", "field.delim" = "," ) LOCATION "s3://{s3_bucket}//{athena_table}" TBLPROPERTIES ("has_encrypted_data"="false") """query_read_table=f"SELECT * from {athena_database}.{athena_table}"query_drop_table=f"DROP TABLE IF EXISTS {athena_database}.{athena_table}"query_drop_database=f"DROP DATABASE IF EXISTS {athena_database}"create_s3_bucket=S3CreateBucketOperator(task_id="create_s3_bucket",bucket_name=s3_bucket)upload_sample_data=S3CreateObjectOperator(task_id="upload_sample_data",s3_bucket=s3_bucket,s3_key=f"{athena_table}/{SAMPLE_FILENAME}",data=SAMPLE_DATA,replace=True,)create_database=AthenaOperator(task_id="create_database",query=query_create_database,database=athena_database,output_location=f"s3://{s3_bucket}/",sleep_time=1,)create_table=AthenaOperator(task_id="create_table",query=query_create_table,database=athena_database,output_location=f"s3://{s3_bucket}/",sleep_time=1,)# [START howto_operator_athena]read_table=AthenaOperator(task_id="read_table",query=query_read_table,database=athena_database,output_location=f"s3://{s3_bucket}/",)# [END howto_operator_athena]read_table.sleep_time=1# [START howto_sensor_athena]await_query=AthenaSensor(task_id="await_query",query_execution_id=read_table.output,)# [END howto_sensor_athena]drop_table=AthenaOperator(task_id="drop_table",query=query_drop_table,database=athena_database,output_location=f"s3://{s3_bucket}/",trigger_rule=TriggerRule.ALL_DONE,sleep_time=1,)drop_database=AthenaOperator(task_id="drop_database",query=query_drop_database,database=athena_database,output_location=f"s3://{s3_bucket}/",trigger_rule=TriggerRule.ALL_DONE,sleep_time=1,)delete_s3_bucket=S3DeleteBucketOperator(task_id="delete_s3_bucket",bucket_name=s3_bucket,force_delete=True,trigger_rule=TriggerRule.ALL_DONE,)chain(# TEST SETUPtest_context,create_s3_bucket,await_bucket(s3_bucket),upload_sample_data,create_database,# TEST BODYcreate_table,read_table,await_query,read_results_from_s3(s3_bucket,read_table.output),# TEST TEARDOWNdrop_table,drop_database,delete_s3_bucket,)fromtests_common.test_utils.watcherimportwatcher# This test needs watcher in order to properly mark success/failure# when "tearDown" task with trigger rule is part of the DAGlist(dag.tasks)>>watcher()fromtests_common.test_utils.system_testsimportget_test_run# noqa: E402# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest)