tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor

Example Airflow DAG that shows how to check Hive partitions existence with Dataproc Metastore Sensor.

Note that Metastore service must be configured to use gRPC endpoints.

Attributes

DAG_ID

PROJECT_ID

ENV_ID

REGION

NETWORK

METASTORE_SERVICE_ID

METASTORE_TIMEOUT

METASTORE_SERVICE

METASTORE_SERVICE_QFN

DATAPROC_CLUSTER_NAME

DATAPROC_CLUSTER_CONFIG

TABLE_NAME

COLUMN

PARTITION_1

PARTITION_2

SOURCE_DATA_BUCKET

SOURCE_DATA_PATH

SOURCE_DATA_FILE_NAME

EXTERNAL_TABLE_BUCKET

QUERY_CREATE_EXTERNAL_TABLE

QUERY_CREATE_PARTITIONED_TABLE

QUERY_COPY_DATA_WITH_PARTITIONS

create_metastore_service

test_run

Module Contents

tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.DAG_ID = 'hive_partition_sensor'[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.PROJECT_ID[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.ENV_ID[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.REGION = 'europe-west1'[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.NETWORK = 'default'[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.METASTORE_SERVICE_ID[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.METASTORE_TIMEOUT = 2400[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.METASTORE_SERVICE[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.METASTORE_SERVICE_QFN[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.DATAPROC_CLUSTER_NAME[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.DATAPROC_CLUSTER_CONFIG[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.TABLE_NAME = 'transactions_partitioned'[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.COLUMN = 'TransactionType'[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.PARTITION_1 = ''[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.PARTITION_2 = ''[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.SOURCE_DATA_BUCKET = 'airflow-system-tests-resources'[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.SOURCE_DATA_PATH = 'dataproc/hive'[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.SOURCE_DATA_FILE_NAME = 'part-00000.parquet'[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.EXTERNAL_TABLE_BUCKET = "{{task_instance.xcom_pull(task_ids='get_hive_warehouse_bucket_task', key='bucket')}}"[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.QUERY_CREATE_EXTERNAL_TABLE = Multiline-String[source]
Show Value
"""
CREATE EXTERNAL TABLE IF NOT EXISTS transactions
(SubmissionDate DATE, TransactionAmount DOUBLE, TransactionType STRING)
STORED AS PARQUET
LOCATION 'gs://{{task_instance.xcom_pull(task_ids='get_hive_warehouse_bucket_task', key='bucket')}}/dataproc/hive';
"""
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.QUERY_CREATE_PARTITIONED_TABLE = Multiline-String[source]
Show Value
"""
CREATE EXTERNAL TABLE IF NOT EXISTS transactions_partitioned
(SubmissionDate DATE, TransactionAmount DOUBLE)
PARTITIONED BY (TransactionType STRING);
"""
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.QUERY_COPY_DATA_WITH_PARTITIONS = Multiline-String[source]
Show Value
"""
SET hive.exec.dynamic.partition.mode=nonstrict;
INSERT INTO TABLE transactions_partitioned PARTITION (TransactionType)
SELECT SubmissionDate,TransactionAmount,TransactionType FROM transactions;
"""
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.create_metastore_service[source]
tests.system.google.cloud.dataproc_metastore.example_dataproc_metastore_hive_partition_sensor.test_run[source]

Was this entry helpful?