Source code for tests.system.providers.amazon.aws.example_glue_data_quality
# Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.from__future__importannotationsfromdatetimeimportdatetimefromairflowimportDAGfromairflow.decoratorsimporttask,task_groupfromairflow.models.baseoperatorimportchainfromairflow.providers.amazon.aws.hooks.glueimportGlueDataQualityHookfromairflow.providers.amazon.aws.operators.athenaimportAthenaOperatorfromairflow.providers.amazon.aws.operators.glueimport(GlueDataQualityOperator,GlueDataQualityRuleSetEvaluationRunOperator,)fromairflow.providers.amazon.aws.operators.s3import(S3CreateBucketOperator,S3CreateObjectOperator,S3DeleteBucketOperator,)fromairflow.providers.amazon.aws.sensors.glueimportGlueDataQualityRuleSetEvaluationRunSensorfromairflow.utils.trigger_ruleimportTriggerRulefromtests.system.providers.amazon.aws.utilsimportSystemTestContextBuilder
[docs]RULE_SET="""Rules = [ RowCount between 2 and 8, IsComplete "name", Uniqueness "name" > 0.95, ColumnLength "name" between 3 and 14, ColumnValues "age" between 19 and 31]"""
env_id=test_context["ENV_ID"]rule_set_name=f"{env_id}-system-test-ruleset"s3_bucket=f"{env_id}-glue-dq-athena-bucket"athena_table=f"{env_id}_test_glue_dq_table"athena_database=f"{env_id}_glue_dq_default"query_create_database=f"CREATE DATABASE IF NOT EXISTS {athena_database}"query_create_table=f"""CREATE EXTERNAL TABLE IF NOT EXISTS {athena_database}.{athena_table} ( `name` string, `age` int ) ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe" WITH SERDEPROPERTIES ( "serialization.format" = ",", "field.delim" = "," ) LOCATION "s3://{s3_bucket}//{athena_table}" TBLPROPERTIES ("has_encrypted_data"="false") """query_read_table=f"SELECT * from {athena_database}.{athena_table}"query_drop_table=f"DROP TABLE IF EXISTS {athena_database}.{athena_table}"query_drop_database=f"DROP DATABASE IF EXISTS {athena_database}"create_s3_bucket=S3CreateBucketOperator(task_id="create_s3_bucket",bucket_name=s3_bucket)upload_sample_data=S3CreateObjectOperator(task_id="upload_sample_data",s3_bucket=s3_bucket,s3_key=f"{athena_table}/{SAMPLE_FILENAME}",data=SAMPLE_DATA,replace=True,)create_database=AthenaOperator(task_id="create_database",query=query_create_database,database=athena_database,output_location=f"s3://{s3_bucket}/",sleep_time=1,)create_table=AthenaOperator(task_id="create_table",query=query_create_table,database=athena_database,output_location=f"s3://{s3_bucket}/",sleep_time=1,)drop_table=AthenaOperator(task_id="drop_table",query=query_drop_table,database=athena_database,output_location=f"s3://{s3_bucket}/",trigger_rule=TriggerRule.ALL_DONE,sleep_time=1,)drop_database=AthenaOperator(task_id="drop_database",query=query_drop_database,database=athena_database,output_location=f"s3://{s3_bucket}/",trigger_rule=TriggerRule.ALL_DONE,sleep_time=1,)delete_s3_bucket=S3DeleteBucketOperator(task_id="delete_s3_bucket",bucket_name=s3_bucket,force_delete=True,trigger_rule=TriggerRule.ALL_DONE,)chain(# TEST SETUPtest_context,create_s3_bucket,upload_sample_data,create_database,create_table,# TEST BODYglue_data_quality_workflow(),# TEST TEARDOWNdelete_ruleset(rule_set_name),drop_table,drop_database,delete_s3_bucket,)fromtests.system.utils.watcherimportwatcher# This test needs watcher in order to properly mark success/failure# when "tearDown" task with trigger rule is part of the DAGlist(dag.tasks)>>watcher()fromtests.system.utilsimportget_test_run# noqa: E402# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest)