Source code for tests.system.providers.google.cloud.dataplex.example_dataplex_dp
# Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License."""Example Airflow DAG that shows how to use Dataplex Scan Data."""from__future__importannotationsimportosfromdatetimeimportdatetimefromgoogle.cloudimportdataplex_v1fromgoogle.cloud.dataplex_v1importDataProfileSpecfromgoogle.protobuf.field_mask_pb2importFieldMaskfromairflow.models.baseoperatorimportchainfromairflow.models.dagimportDAGfromairflow.providers.google.cloud.operators.bigqueryimport(BigQueryCreateEmptyDatasetOperator,BigQueryCreateEmptyTableOperator,BigQueryDeleteDatasetOperator,BigQueryInsertJobOperator,)fromairflow.providers.google.cloud.operators.datapleximport(DataplexCreateAssetOperator,DataplexCreateLakeOperator,DataplexCreateOrUpdateDataProfileScanOperator,DataplexCreateZoneOperator,DataplexDeleteAssetOperator,DataplexDeleteDataProfileScanOperator,DataplexDeleteLakeOperator,DataplexDeleteZoneOperator,DataplexGetDataProfileScanOperator,DataplexGetDataProfileScanResultOperator,DataplexRunDataProfileScanOperator,)fromairflow.providers.google.cloud.sensors.datapleximportDataplexDataProfileJobStatusSensorfromairflow.utils.trigger_ruleimportTriggerRule
create_table_1=BigQueryCreateEmptyTableOperator(task_id="create_table_1",dataset_id=DATASET,table_id=TABLE_1,schema_fields=SCHEMA,location=LOCATION,)create_table_2=BigQueryCreateEmptyTableOperator(task_id="create_table_2",dataset_id=DATASET,table_id=TABLE_2,schema_fields=SCHEMA,location=LOCATION,)insert_query_job=BigQueryInsertJobOperator(task_id="insert_query_job",configuration={"query":{"query":INSERT_ROWS_QUERY,"useLegacySql":False,}},)create_lake=DataplexCreateLakeOperator(task_id="create_lake",project_id=PROJECT_ID,region=REGION,body=EXAMPLE_LAKE_BODY,lake_id=LAKE_ID)# [START howto_dataplex_create_zone_operator]create_zone=DataplexCreateZoneOperator(task_id="create_zone",project_id=PROJECT_ID,region=REGION,lake_id=LAKE_ID,body=EXAMPLE_ZONE,zone_id=ZONE_ID,)# [END howto_dataplex_create_zone_operator]# [START howto_dataplex_create_asset_operator]create_asset=DataplexCreateAssetOperator(task_id="create_asset",project_id=PROJECT_ID,region=REGION,body=EXAMPLE_ASSET,lake_id=LAKE_ID,zone_id=ZONE_ID,asset_id=ASSET_ID,)# [END howto_dataplex_create_asset_operator]# [START howto_dataplex_create_data_profile_operator]create_data_scan=DataplexCreateOrUpdateDataProfileScanOperator(task_id="create_data_scan",project_id=PROJECT_ID,region=REGION,body=EXAMPLE_DATA_SCAN,data_scan_id=DATA_SCAN_ID,)# [END howto_dataplex_create_data_profile_operator]update_data_scan=DataplexCreateOrUpdateDataProfileScanOperator(task_id="update_data_scan",project_id=PROJECT_ID,region=REGION,update_mask=UPDATE_MASK,body=EXAMPLE_DATA_SCAN_UPDATE,data_scan_id=DATA_SCAN_ID,)# [START howto_dataplex_get_data_profile_operator]get_data_scan=DataplexGetDataProfileScanOperator(task_id="get_data_scan",project_id=PROJECT_ID,region=REGION,data_scan_id=DATA_SCAN_ID,)# [END howto_dataplex_get_data_profile_operator]run_data_scan_sync=DataplexRunDataProfileScanOperator(task_id="run_data_scan_sync",project_id=PROJECT_ID,region=REGION,data_scan_id=DATA_SCAN_ID,)get_data_scan_job_result=DataplexGetDataProfileScanResultOperator(task_id="get_data_scan_job_result",project_id=PROJECT_ID,region=REGION,data_scan_id=DATA_SCAN_ID,)# [START howto_dataplex_run_data_profile_operator]run_data_scan_async=DataplexRunDataProfileScanOperator(task_id="run_data_scan_async",project_id=PROJECT_ID,region=REGION,data_scan_id=DATA_SCAN_ID,asynchronous=True,)# [END howto_dataplex_run_data_profile_operator]# [START howto_dataplex_data_scan_job_state_sensor]get_data_scan_job_status=DataplexDataProfileJobStatusSensor(task_id="get_data_scan_job_status",project_id=PROJECT_ID,region=REGION,data_scan_id=DATA_SCAN_ID,job_id="{{ task_instance.xcom_pull('run_data_scan_async') }}",)# [END howto_dataplex_data_scan_job_state_sensor]# [START howto_dataplex_get_data_profile_job_operator]get_data_scan_job_result_2=DataplexGetDataProfileScanResultOperator(task_id="get_data_scan_job_result_2",project_id=PROJECT_ID,region=REGION,data_scan_id=DATA_SCAN_ID,)# [END howto_dataplex_get_data_profile_job_operator]# [START howto_dataplex_run_data_profile_def_operator]run_data_scan_def=DataplexRunDataProfileScanOperator(task_id="run_data_scan_def",project_id=PROJECT_ID,region=REGION,data_scan_id=DATA_SCAN_ID,deferrable=True,)# [END howto_dataplex_run_data_profile_def_operator]run_data_scan_async_2=DataplexRunDataProfileScanOperator(task_id="run_data_scan_async_2",project_id=PROJECT_ID,region=REGION,data_scan_id=DATA_SCAN_ID,asynchronous=True,)# [START howto_dataplex_delete_asset_operator]delete_asset=DataplexDeleteAssetOperator(task_id="delete_asset",project_id=PROJECT_ID,region=REGION,lake_id=LAKE_ID,zone_id=ZONE_ID,asset_id=ASSET_ID,trigger_rule=TriggerRule.ALL_DONE,)# [END howto_dataplex_delete_asset_operator]# [START howto_dataplex_delete_zone_operator]delete_zone=DataplexDeleteZoneOperator(task_id="delete_zone",project_id=PROJECT_ID,region=REGION,lake_id=LAKE_ID,zone_id=ZONE_ID,trigger_rule=TriggerRule.ALL_DONE,)# [END howto_dataplex_delete_zone_operator]# [START howto_dataplex_delete_data_profile_operator]delete_data_scan=DataplexDeleteDataProfileScanOperator(task_id="delete_data_scan",project_id=PROJECT_ID,region=REGION,data_scan_id=DATA_SCAN_ID,trigger_rule=TriggerRule.ALL_DONE,)# [END howto_dataplex_delete_data_profile_operator]delete_lake=DataplexDeleteLakeOperator(project_id=PROJECT_ID,region=REGION,lake_id=LAKE_ID,task_id="delete_lake",trigger_rule=TriggerRule.ALL_DONE,)delete_dataset=BigQueryDeleteDatasetOperator(task_id="delete_dataset",dataset_id=DATASET,project_id=PROJECT_ID,delete_contents=True,trigger_rule=TriggerRule.ALL_DONE,)chain(# TEST SETUPcreate_dataset,[create_table_1,create_table_2],insert_query_job,create_lake,create_zone,create_asset,# TEST BODYcreate_data_scan,update_data_scan,get_data_scan,run_data_scan_sync,get_data_scan_job_result,run_data_scan_async,get_data_scan_job_status,get_data_scan_job_result_2,run_data_scan_def,run_data_scan_async_2,# TEST TEARDOWNdelete_asset,delete_zone,delete_data_scan,[delete_lake,delete_dataset],)fromtests.system.utils.watcherimportwatcher# This test needs watcher in order to properly mark success/failure# when "tearDown" task with trigger rule is part of the DAGlist(dag.tasks)>>watcher()fromtests.system.utilsimportget_test_run# noqa: E402# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest)