Source code for airflow.providers.amazon.aws.hooks.redshift_sql
# Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.from__future__importannotationsfromfunctoolsimportcached_propertyfromtypingimportTYPE_CHECKINGimportredshift_connectorfromredshift_connectorimportConnectionasRedshiftConnectionfromsqlalchemyimportcreate_enginefromsqlalchemy.engine.urlimportURLfromairflow.exceptionsimportAirflowExceptionfromairflow.providers.amazon.aws.hooks.base_awsimportAwsBaseHookfromairflow.providers.common.sql.hooks.sqlimportDbApiHookifTYPE_CHECKING:fromairflow.models.connectionimportConnection
[docs]classRedshiftSQLHook(DbApiHook):"""Execute statements against Amazon Redshift. This hook requires the redshift_conn_id connection. Note: For AWS IAM authentication, use iam in the extra connection parameters and set it to true. Leave the password field empty. This will use the "aws_default" connection to get the temporary token unless you override with aws_conn_id when initializing the hook. The cluster-identifier is extracted from the beginning of the host field, so is optional. It can however be overridden in the extra field. extras example: ``{"iam":true}`` :param redshift_conn_id: reference to :ref:`Amazon Redshift connection id<howto/connection:redshift>` .. note:: get_sqlalchemy_engine() and get_uri() depend on sqlalchemy-amazon-redshift """
[docs]defget_iam_token(self,conn:Connection)->tuple[str,str,int]:"""Retrieve a temporary password to connect to Redshift. Port is required. If none is provided, default is used for each service. """port=conn.portor5439# Pull the custer-identifier from the beginning of the Redshift URL# ex. my-cluster.ccdre4hpd39h.us-east-1.redshift.amazonaws.com returns my-clustercluster_identifier=conn.extra_dejson.get("cluster_identifier")ifnotcluster_identifier:ifconn.host:cluster_identifier=conn.host.split(".",1)[0]else:raiseAirflowException("Please set cluster_identifier or host in redshift connection.")redshift_client=AwsBaseHook(aws_conn_id=self.aws_conn_id,client_type="redshift").conn# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/redshift.html#Redshift.Client.get_cluster_credentialscluster_creds=redshift_client.get_cluster_credentials(DbUser=conn.login,DbName=conn.schema,ClusterIdentifier=cluster_identifier,AutoCreate=False,)token=cluster_creds["DbPassword"]login=cluster_creds["DbUser"]returnlogin,token,port
[docs]defget_uri(self)->str:"""Overridden to use the Redshift dialect as driver name."""conn_params=self._get_conn_params()if"user"inconn_params:conn_params["username"]=conn_params.pop("user")# Compatibility: The 'create' factory method was added in SQLAlchemy 1.4# to replace calling the default URL constructor directly.create_url=getattr(URL,"create",URL)returnstr(create_url(drivername="redshift+redshift_connector",**conn_params))
[docs]defget_sqlalchemy_engine(self,engine_kwargs=None):"""Overridden to pass Redshift-specific arguments."""conn_kwargs=self.conn.extra_dejsonifengine_kwargsisNone:engine_kwargs={}if"connect_args"inengine_kwargs:engine_kwargs["connect_args"]={**conn_kwargs,**engine_kwargs["connect_args"]}else:engine_kwargs["connect_args"]=conn_kwargsreturncreate_engine(self.get_uri(),**engine_kwargs)
[docs]defget_table_primary_key(self,table:str,schema:str|None="public")->list[str]|None:"""Get the table's primary key. :param table: Name of the target table :param schema: Name of the target schema, public by default :return: Primary key columns list """sql=""" select kcu.column_name from information_schema.table_constraints tco join information_schema.key_column_usage kcu on kcu.constraint_name = tco.constraint_name and kcu.constraint_schema = tco.constraint_schema and kcu.constraint_name = tco.constraint_name where tco.constraint_type = 'PRIMARY KEY' and kcu.table_schema = %s and kcu.table_name = %s """pk_columns=[row[0]forrowinself.get_records(sql,(schema,table))]returnpk_columnsorNone
[docs]defget_conn(self)->RedshiftConnection:"""Get a ``redshift_connector.Connection`` object."""conn_params=self._get_conn_params()conn_kwargs_dejson=self.conn.extra_dejsonconn_kwargs:dict={**conn_params,**conn_kwargs_dejson}returnredshift_connector.connect(**conn_kwargs)