Source code for tests.system.providers.amazon.aws.example_glue_databrew
# Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.from__future__importannotationsimportboto3importpendulumfromairflow.decoratorsimporttaskfromairflow.models.baseoperatorimportchainfromairflow.models.dagimportDAGfromairflow.providers.amazon.aws.operators.glue_databrewimport(GlueDataBrewStartJobOperator,)fromairflow.providers.amazon.aws.operators.s3import(S3CreateBucketOperator,S3CreateObjectOperator,S3DeleteBucketOperator,)fromairflow.utils.trigger_ruleimportTriggerRulefromtests.system.providers.amazon.aws.utilsimportSystemTestContextBuilder
env_id=test_context["ENV_ID"]role_arn=test_context[ROLE_ARN_KEY]bucket_name=f"{env_id}-bucket-databrew"output_bucket_name=f"{env_id}-output-bucket-databrew"file_name="data.json"dataset_name=f"{env_id}-dataset"job_name=f"{env_id}-databrew-job"create_bucket=S3CreateBucketOperator(task_id="create_bucket",bucket_name=bucket_name,)create_output_bucket=S3CreateBucketOperator(task_id="create_output_bucket",bucket_name=output_bucket_name,)upload_file=S3CreateObjectOperator(task_id="upload_file",s3_bucket=bucket_name,s3_key=file_name,data=EXAMPLE_JSON,replace=True,)# [START howto_operator_glue_databrew_start]start_job=GlueDataBrewStartJobOperator(task_id="startjob",job_name=job_name,delay=15)# [END howto_operator_glue_databrew_start]delete_bucket=S3DeleteBucketOperator(task_id="delete_bucket",trigger_rule=TriggerRule.ALL_DONE,bucket_name=bucket_name,force_delete=True,)delete_output_bucket=S3DeleteBucketOperator(task_id="delete_output_bucket",trigger_rule=TriggerRule.ALL_DONE,bucket_name=output_bucket_name,force_delete=True,)chain(# TEST SETUPtest_context,create_bucket,create_output_bucket,upload_file,create_dataset(dataset_name,bucket_name,file_name),create_job(dataset_name,job_name,output_bucket_name,"output.json",role_arn),# TEST BODYstart_job,# TEST TEARDOWNdelete_job(job_name),delete_dataset(dataset_name),delete_bucket,delete_output_bucket,)fromtests.system.utils.watcherimportwatcher# This test needs watcher in order to properly mark success/failure# when "tearDown" task with trigger rule is part of the DAGlist(dag.tasks)>>watcher()fromtests.system.utilsimportget_test_run# noqa: E402# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest)