Source code for tests.system.providers.google.cloud.cloud_sql.example_cloud_sql_query

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Example Airflow DAG that performs query in a Cloud SQL instance.
"""

from __future__ import annotations

import logging
import os
from collections import namedtuple
from copy import deepcopy
from datetime import datetime
from pathlib import Path
from typing import Any, Iterable

from googleapiclient import discovery

from airflow import settings
from airflow.decorators import task, task_group
from airflow.models.connection import Connection
from airflow.models.dag import DAG
from airflow.operators.bash import BashOperator
from airflow.providers.google.cloud.operators.cloud_sql import (
    CloudSQLCreateInstanceDatabaseOperator,
    CloudSQLCreateInstanceOperator,
    CloudSQLDeleteInstanceOperator,
    CloudSQLExecuteQueryOperator,
)
from airflow.utils.trigger_rule import TriggerRule

[docs]ENV_ID = os.environ.get("SYSTEM_TESTS_ENV_ID")
[docs]PROJECT_ID = os.environ.get("SYSTEM_TESTS_GCP_PROJECT")
[docs]DAG_ID = "cloudsql-query"
[docs]REGION = "us-central1"
[docs]HOME_DIR = Path.home()
[docs]COMPOSER_ENVIRONMENT = os.environ.get("COMPOSER_ENVIRONMENT", "")
if COMPOSER_ENVIRONMENT: # We assume that the test is launched in Cloud Composer environment because the reserved environment # variable is assigned (https://cloud.google.com/composer/docs/composer-2/set-environment-variables)
[docs] GET_COMPOSER_NETWORK_COMMAND = """ gcloud composer environments describe $COMPOSER_ENVIRONMENT \ --location=$COMPOSER_LOCATION \ --project=$GCP_PROJECT \ --format="value(config.nodeConfig.network)" """
else: # The test is launched locally GET_COMPOSER_NETWORK_COMMAND = "echo"
[docs]def run_in_composer(): return bool(COMPOSER_ENVIRONMENT)
[docs]CLOUD_SQL_INSTANCE_NAME_TEMPLATE = f"{ENV_ID}-{DAG_ID}".replace("_", "-")
[docs]CLOUD_SQL_INSTANCE_CREATE_BODY_TEMPLATE: dict[str, Any] = { "name": CLOUD_SQL_INSTANCE_NAME_TEMPLATE, "settings": { "tier": "db-custom-1-3840", "dataDiskSizeGb": 30, "pricingPlan": "PER_USE", "ipConfiguration": {}, }, # For using a different database version please check the link below. # https://cloud.google.com/sql/docs/mysql/admin-api/rest/v1/SqlDatabaseVersion "databaseVersion": "1.2.3", "region": REGION, "ipConfiguration": { "ipv4Enabled": True, "requireSsl": False, "authorizedNetworks": [ {"value": "0.0.0.0/0"}, ], }, }
[docs]DB_PROVIDERS: Iterable[dict[str, str]] = ( { "database_type": "postgres", "port": "5432", "database_version": "POSTGRES_15", "cloud_sql_instance_name": f"{CLOUD_SQL_INSTANCE_NAME_TEMPLATE}-postgres", }, { "database_type": "mysql", "port": "3306", "database_version": "MYSQL_8_0", "cloud_sql_instance_name": f"{CLOUD_SQL_INSTANCE_NAME_TEMPLATE}-mysql", }, )
[docs]def ip_configuration() -> dict[str, Any]: """Generates an ip configuration for a CloudSQL instance creation body""" if run_in_composer(): # Use connection to Cloud SQL instance via Private IP within the Cloud Composer's network. return { "ipv4Enabled": True, "requireSsl": False, "enablePrivatePathForGoogleCloudServices": True, "privateNetwork": """{{ task_instance.xcom_pull('get_composer_network')}}""", } else: # Use connection to Cloud SQL instance via Public IP from anywhere (mask 0.0.0.0/0). # Consider specifying your network mask # for allowing requests only from the trusted sources, not from anywhere. return { "ipv4Enabled": True, "requireSsl": False, "authorizedNetworks": [ {"value": "0.0.0.0/0"}, ], }
[docs]def cloud_sql_instance_create_body(database_provider: dict[str, Any]) -> dict[str, Any]: """Generates a CloudSQL instance creation body""" create_body: dict[str, Any] = deepcopy(CLOUD_SQL_INSTANCE_CREATE_BODY_TEMPLATE) create_body["name"] = database_provider["cloud_sql_instance_name"] create_body["databaseVersion"] = database_provider["database_version"] create_body["settings"]["ipConfiguration"] = ip_configuration() return create_body
[docs]CLOUD_SQL_DATABASE_NAME = "test_db"
[docs]CLOUD_SQL_USER = "test_user"
[docs]CLOUD_SQL_PASSWORD = "JoxHlwrPzwch0gz9"
[docs]CLOUD_SQL_IP_ADDRESS = "127.0.0.1"
[docs]CLOUD_SQL_PUBLIC_PORT = 5432
[docs]def cloud_sql_database_create_body(instance: str) -> dict[str, Any]: """Generates a CloudSQL database creation body""" return { "instance": instance, "name": CLOUD_SQL_DATABASE_NAME, "project": PROJECT_ID, }
[docs]CLOUD_SQL_INSTANCE_NAME = ""
[docs]DATABASE_TYPE = "" # "postgres|mysql|mssql"
# [START howto_operator_cloudsql_query_connections] # Connect via proxy over TCP
[docs]CONNECTION_PROXY_TCP_KWARGS = { "conn_type": "gcpcloudsql", "login": CLOUD_SQL_USER, "password": CLOUD_SQL_PASSWORD, "host": CLOUD_SQL_IP_ADDRESS, "port": CLOUD_SQL_PUBLIC_PORT, "schema": CLOUD_SQL_DATABASE_NAME, "extra": { "database_type": DATABASE_TYPE, "project_id": PROJECT_ID, "location": REGION, "instance": CLOUD_SQL_INSTANCE_NAME, "use_proxy": "True", "sql_proxy_use_tcp": "True", }, }
# Connect via proxy over UNIX socket (specific proxy version)
[docs]CONNECTION_PROXY_SOCKET_KWARGS = { "conn_type": "gcpcloudsql", "login": CLOUD_SQL_USER, "password": CLOUD_SQL_PASSWORD, "host": CLOUD_SQL_IP_ADDRESS, "port": CLOUD_SQL_PUBLIC_PORT, "schema": CLOUD_SQL_DATABASE_NAME, "extra": { "database_type": DATABASE_TYPE, "project_id": PROJECT_ID, "location": REGION, "instance": CLOUD_SQL_INSTANCE_NAME, "use_proxy": "True", "sql_proxy_version": "v1.33.9", "sql_proxy_use_tcp": "False", }, }
# Connect directly via TCP (non-SSL)
[docs]CONNECTION_PUBLIC_TCP_KWARGS = { "conn_type": "gcpcloudsql", "login": CLOUD_SQL_USER, "password": CLOUD_SQL_PASSWORD, "host": CLOUD_SQL_IP_ADDRESS, "port": CLOUD_SQL_PUBLIC_PORT, "schema": CLOUD_SQL_DATABASE_NAME, "extra": { "database_type": DATABASE_TYPE, "project_id": PROJECT_ID, "location": REGION, "instance": CLOUD_SQL_INSTANCE_NAME, "use_proxy": "False", "use_ssl": "False", }, }
# [END howto_operator_cloudsql_query_connections]
[docs]CONNECTION_PROXY_SOCKET_ID = f"{DAG_ID}_{ENV_ID}_proxy_socket"
[docs]CONNECTION_PROXY_TCP_ID = f"{DAG_ID}_{ENV_ID}_proxy_tcp"
[docs]CONNECTION_PUBLIC_TCP_ID = f"{DAG_ID}_{ENV_ID}_public_tcp"
[docs]ConnectionConfig = namedtuple("ConnectionConfig", "id kwargs")
[docs]CONNECTIONS = [ ConnectionConfig(id=CONNECTION_PROXY_SOCKET_ID, kwargs=CONNECTION_PROXY_SOCKET_KWARGS), ConnectionConfig(id=CONNECTION_PROXY_TCP_ID, kwargs=CONNECTION_PROXY_TCP_KWARGS), ConnectionConfig(id=CONNECTION_PUBLIC_TCP_ID, kwargs=CONNECTION_PUBLIC_TCP_KWARGS), ]
[docs]SQL = [ "CREATE TABLE IF NOT EXISTS TABLE_TEST (I INTEGER)", "CREATE TABLE IF NOT EXISTS TABLE_TEST (I INTEGER)", "INSERT INTO TABLE_TEST VALUES (0)", "CREATE TABLE IF NOT EXISTS TABLE_TEST2 (I INTEGER)", "DROP TABLE TABLE_TEST", "DROP TABLE TABLE_TEST2", ]
[docs]DELETE_CONNECTION_COMMAND = "airflow connections delete {}"
# [START howto_operator_cloudsql_query_connections_env] # The connections below are created using one of the standard approaches - via environment # variables named AIRFLOW_CONN_* . The connections can also be created in the database # of AIRFLOW (using command line or UI).
[docs]postgres_kwargs = { "user": "user", "password": "password", "public_ip": "public_ip", "public_port": "public_port", "database": "database", "project_id": "project_id", "location": "location", "instance": "instance", "client_cert_file": "client_cert_file", "client_key_file": "client_key_file", "server_ca_file": "server_ca_file", }
# Postgres: connect via proxy over TCP os.environ["AIRFLOW_CONN_PROXY_POSTGRES_TCP"] = ( "gcpcloudsql://{user}:{password}@{public_ip}:{public_port}/{database}?" "database_type=postgres&" "project_id={project_id}&" "location={location}&" "instance={instance}&" "use_proxy=True&" "sql_proxy_use_tcp=True".format(**postgres_kwargs) ) # Postgres: connect via proxy over UNIX socket (specific proxy version) os.environ["AIRFLOW_CONN_PROXY_POSTGRES_SOCKET"] = ( "gcpcloudsql://{user}:{password}@{public_ip}:{public_port}/{database}?" "database_type=postgres&" "project_id={project_id}&" "location={location}&" "instance={instance}&" "use_proxy=True&" "sql_proxy_version=v1.13&" "sql_proxy_use_tcp=False".format(**postgres_kwargs) ) # Postgres: connect directly via TCP (non-SSL) os.environ["AIRFLOW_CONN_PUBLIC_POSTGRES_TCP"] = ( "gcpcloudsql://{user}:{password}@{public_ip}:{public_port}/{database}?" "database_type=postgres&" "project_id={project_id}&" "location={location}&" "instance={instance}&" "use_proxy=False&" "use_ssl=False".format(**postgres_kwargs) ) # Postgres: connect directly via TCP (SSL) os.environ["AIRFLOW_CONN_PUBLIC_POSTGRES_TCP_SSL"] = ( "gcpcloudsql://{user}:{password}@{public_ip}:{public_port}/{database}?" "database_type=postgres&" "project_id={project_id}&" "location={location}&" "instance={instance}&" "use_proxy=False&" "use_ssl=True&" "sslcert={client_cert_file}&" "sslkey={client_key_file}&" "sslrootcert={server_ca_file}".format(**postgres_kwargs) )
[docs]mysql_kwargs = { "user": "user", "password": "password", "public_ip": "public_ip", "public_port": "public_port", "database": "database", "project_id": "project_id", "location": "location", "instance": "instance", "client_cert_file": "client_cert_file", "client_key_file": "client_key_file", "server_ca_file": "server_ca_file", }
# MySQL: connect via proxy over TCP (specific proxy version) os.environ["AIRFLOW_CONN_PROXY_MYSQL_TCP"] = ( "gcpcloudsql://{user}:{password}@{public_ip}:{public_port}/{database}?" "database_type=mysql&" "project_id={project_id}&" "location={location}&" "instance={instance}&" "use_proxy=True&" "sql_proxy_version=v1.13&" "sql_proxy_use_tcp=True".format(**mysql_kwargs) ) # MySQL: connect via proxy over UNIX socket using pre-downloaded Cloud Sql Proxy binary os.environ["AIRFLOW_CONN_PROXY_MYSQL_SOCKET"] = ( "gcpcloudsql://{user}:{password}@{public_ip}:{public_port}/{database}?" "database_type=mysql&" "project_id={project_id}&" "location={location}&" "instance={instance}&" "use_proxy=True&" "sql_proxy_use_tcp=False".format(**mysql_kwargs) ) # MySQL: connect directly via TCP (non-SSL) os.environ["AIRFLOW_CONN_PUBLIC_MYSQL_TCP"] = ( "gcpcloudsql://{user}:{password}@{public_ip}:{public_port}/{database}?" "database_type=mysql&" "project_id={project_id}&" "location={location}&" "instance={instance}&" "use_proxy=False&" "use_ssl=False".format(**mysql_kwargs) ) # MySQL: connect directly via TCP (SSL) and with fixed Cloud Sql Proxy binary path os.environ["AIRFLOW_CONN_PUBLIC_MYSQL_TCP_SSL"] = ( "gcpcloudsql://{user}:{password}@{public_ip}:{public_port}/{database}?" "database_type=mysql&" "project_id={project_id}&" "location={location}&" "instance={instance}&" "use_proxy=False&" "use_ssl=True&" "sslcert={client_cert_file}&" "sslkey={client_key_file}&" "sslrootcert={server_ca_file}".format(**mysql_kwargs) ) # Special case: MySQL: connect directly via TCP (SSL) and with fixed Cloud Sql # Proxy binary path AND with missing project_id os.environ["AIRFLOW_CONN_PUBLIC_MYSQL_TCP_SSL_NO_PROJECT_ID"] = ( "gcpcloudsql://{user}:{password}@{public_ip}:{public_port}/{database}?" "database_type=mysql&" "location={location}&" "instance={instance}&" "use_proxy=False&" "use_ssl=True&" "sslcert={client_cert_file}&" "sslkey={client_key_file}&" "sslrootcert={server_ca_file}".format(**mysql_kwargs) ) # [END howto_operator_cloudsql_query_connections_env]
[docs]log = logging.getLogger(__name__)
with DAG( dag_id=DAG_ID, start_date=datetime(2021, 1, 1), catchup=False, tags=["example", "cloudsql", "postgres"], ) as dag:
[docs] get_composer_network = BashOperator( task_id="get_composer_network", bash_command=GET_COMPOSER_NETWORK_COMMAND, do_xcom_push=True, )
for db_provider in DB_PROVIDERS: database_type: str = db_provider["database_type"] cloud_sql_instance_name: str = db_provider["cloud_sql_instance_name"] create_cloud_sql_instance = CloudSQLCreateInstanceOperator( task_id=f"create_cloud_sql_instance_{database_type}", project_id=PROJECT_ID, instance=cloud_sql_instance_name, body=cloud_sql_instance_create_body(database_provider=db_provider), ) create_database = CloudSQLCreateInstanceDatabaseOperator( task_id=f"create_database_{database_type}", body=cloud_sql_database_create_body(instance=cloud_sql_instance_name), instance=cloud_sql_instance_name, ) @task(task_id=f"create_user_{database_type}") def create_user(instance: str) -> None: with discovery.build("sqladmin", "v1beta4") as service: request = service.users().insert( project=PROJECT_ID, instance=instance, body={ "name": CLOUD_SQL_USER, "password": CLOUD_SQL_PASSWORD, }, ) request.execute() return None create_user_task = create_user(instance=cloud_sql_instance_name) @task(task_id=f"get_ip_address_{database_type}") def get_ip_address(instance: str) -> str | None: """Returns a Cloud SQL instance IP address. If the test is running in Cloud Composer, the Private IP address is used, otherwise Public IP.""" with discovery.build("sqladmin", "v1beta4") as service: request = service.connect().get( project=PROJECT_ID, instance=instance, fields="ipAddresses", ) response = request.execute() for ip_item in response.get("ipAddresses", []): if run_in_composer(): if ip_item["type"] == "PRIVATE": return ip_item["ipAddress"] else: if ip_item["type"] == "PRIMARY": return ip_item["ipAddress"] return None get_ip_address_task = get_ip_address(instance=cloud_sql_instance_name) @task(task_id=f"create_connection_{database_type}") def create_connection( connection_id: str, instance: str, db_type: str, ip_address: str, port: str, kwargs: dict[str, Any], ) -> str | None: session = settings.Session() if session.query(Connection).filter(Connection.conn_id == connection_id).first(): log.warning("Connection '%s' already exists", connection_id) return connection_id connection: dict[str, Any] = deepcopy(kwargs) connection["extra"]["instance"] = instance connection["host"] = ip_address connection["extra"]["database_type"] = db_type connection["port"] = port _conn = Connection(conn_id=connection_id, **connection) session.add(_conn) session.commit() log.info("Connection created: '%s'", connection_id) return connection_id @task_group(group_id=f"create_connections_{database_type}") def create_connections(instance: str, db_type: str, ip_address: str, port: str): for conn in CONNECTIONS: conn_id = f"{conn.id}_{database_type}" create_connection( connection_id=conn_id, instance=instance, db_type=db_type, ip_address=ip_address, port=port, kwargs=conn.kwargs, ) create_connections_task = create_connections( instance=cloud_sql_instance_name, db_type=database_type, ip_address=get_ip_address_task, port=db_provider["port"], ) @task_group(group_id=f"execute_queries_{database_type}") def execute_queries(db_type: str): prev_task = None for conn in CONNECTIONS: connection_id = f"{conn.id}_{db_type}" task_id = f"execute_query_{connection_id}" # [START howto_operator_cloudsql_query_operators] query_task = CloudSQLExecuteQueryOperator( gcp_cloudsql_conn_id=connection_id, task_id=task_id, sql=SQL, ) # [END howto_operator_cloudsql_query_operators] if prev_task: prev_task >> query_task prev_task = query_task execute_queries_task = execute_queries(db_type=database_type) @task_group(group_id=f"teardown_{database_type}") def teardown(instance: str, db_type: str): task_id = f"delete_cloud_sql_instance_{db_type}" CloudSQLDeleteInstanceOperator( task_id=task_id, project_id=PROJECT_ID, instance=instance, trigger_rule=TriggerRule.ALL_DONE, ) for conn in CONNECTIONS: connection_id = f"{conn.id}_{db_type}" BashOperator( task_id=f"delete_connection_{connection_id}", bash_command=DELETE_CONNECTION_COMMAND.format(connection_id), trigger_rule=TriggerRule.ALL_DONE, ) teardown_task = teardown(instance=cloud_sql_instance_name, db_type=database_type) ( # TEST SETUP get_composer_network >> create_cloud_sql_instance >> [ create_database, create_user_task, get_ip_address_task, ] >> create_connections_task # TEST BODY >> execute_queries_task # TEST TEARDOWN >> teardown_task ) # ### Everything below this line is not part of example ### # ### Just for system tests purpose ### from tests.system.utils.watcher import watcher # This test needs watcher in order to properly mark success/failure # when "tearDown" task with trigger rule is part of the DAG list(dag.tasks) >> watcher() from tests.system.utils import get_test_run # noqa: E402 # Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest)
[docs]test_run = get_test_run(dag)

Was this entry helpful?