Source code for tests.system.providers.google.cloud.dataflow.example_dataflow_pipeline
## Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License."""Example Airflow DAG for testing Google Dataflow to create pipelines."""from__future__importannotationsimportosfromdatetimeimportdatetimefromairflow.models.dagimportDAGfromairflow.providers.google.cloud.operators.dataflowimport(DataflowCreatePipelineOperator,DataflowDeletePipelineOperator,DataflowRunPipelineOperator,)fromairflow.providers.google.cloud.operators.gcsimport(GCSCreateBucketOperator,GCSDeleteBucketOperator,GCSSynchronizeBucketsOperator,)fromairflow.utils.trigger_ruleimportTriggerRule
move_files_to_bucket=GCSSynchronizeBucketsOperator(task_id="move_files_to_bucket",source_bucket=RESOURCE_DATA_BUCKET,source_object="dataflow/pipelines",destination_bucket=BUCKET_NAME,destination_object="dataflow",recursive=True,)# [START howto_operator_create_dataflow_pipeline]create_pipeline=DataflowCreatePipelineOperator(task_id="create_pipeline",project_id=GCP_PROJECT_ID,location=GCP_LOCATION,body={"name":f"projects/{GCP_PROJECT_ID}/locations/{GCP_LOCATION}/pipelines/{PIPELINE_NAME}","type":PIPELINE_TYPE,"workload":{"dataflowFlexTemplateRequest":{"launchParameter":{"containerSpecGcsPath":GCS_PATH,"jobName":PIPELINE_JOB_NAME,"environment":{"tempLocation":TEMP_LOCATION},"parameters":{"inputFile":INPUT_FILE,"output":OUTPUT,},},"projectId":GCP_PROJECT_ID,"location":GCP_LOCATION,}},},)# [END howto_operator_create_dataflow_pipeline]# [START howto_operator_run_dataflow_pipeline]run_pipeline=DataflowRunPipelineOperator(task_id="run_pipeline",pipeline_name=PIPELINE_NAME,project_id=GCP_PROJECT_ID,)# [END howto_operator_run_dataflow_pipeline]# [START howto_operator_delete_dataflow_pipeline]delete_pipeline=DataflowDeletePipelineOperator(task_id="delete_pipeline",pipeline_name=PIPELINE_NAME,project_id=GCP_PROJECT_ID,trigger_rule=TriggerRule.ALL_DONE,)# [END howto_operator_delete_dataflow_pipeline]delete_bucket=GCSDeleteBucketOperator(task_id="delete_bucket",bucket_name=BUCKET_NAME,trigger_rule=TriggerRule.ALL_DONE)(# TEST SETUPcreate_bucket>>move_files_to_bucket# TEST BODY>>create_pipeline>>run_pipeline# TEST TEARDOWN>>delete_pipeline>>delete_bucket)fromtests.system.utils.watcherimportwatcher# This test needs watcher in order to properly mark success/failure# when "teardown" task with trigger rule is part of the DAGlist(dag.tasks)>>watcher()fromtests.system.utilsimportget_test_run# noqa: E402# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest)