Source code for airflow.providers.amazon.aws.hooks.redshift
## Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License."""Interact with AWS Redshift clusters."""fromtypingimportDict,List,Optional,Uniontry:fromfunctoolsimportcached_propertyexceptImportError:fromcached_propertyimportcached_propertyimportredshift_connectorfromredshift_connectorimportConnectionasRedshiftConnectionfromsqlalchemyimportcreate_enginefromsqlalchemy.engine.urlimportURLfromairflow.hooks.dbapiimportDbApiHookfromairflow.providers.amazon.aws.hooks.base_awsimportAwsBaseHook
[docs]classRedshiftHook(AwsBaseHook):""" Interact with AWS Redshift, using the boto3 library Additional arguments (such as ``aws_conn_id``) may be specified and are passed down to the underlying AwsBaseHook. .. seealso:: :class:`~airflow.providers.amazon.aws.hooks.base_aws.AwsBaseHook` :param aws_conn_id: The Airflow connection used for AWS credentials. :type aws_conn_id: str """def__init__(self,*args,**kwargs)->None:kwargs["client_type"]="redshift"super().__init__(*args,**kwargs)# TODO: Wrap create_cluster_snapshot
[docs]defcluster_status(self,cluster_identifier:str)->str:""" Return status of a cluster :param cluster_identifier: unique identifier of a cluster :type cluster_identifier: str :param skip_final_cluster_snapshot: determines cluster snapshot creation :type skip_final_cluster_snapshot: bool :param final_cluster_snapshot_identifier: Optional[str] :type final_cluster_snapshot_identifier: Optional[str] """try:response=self.get_conn().describe_clusters(ClusterIdentifier=cluster_identifier)['Clusters']returnresponse[0]['ClusterStatus']ifresponseelseNoneexceptself.get_conn().exceptions.ClusterNotFoundFault:return'cluster_not_found'
[docs]defdelete_cluster(self,cluster_identifier:str,skip_final_cluster_snapshot:bool=True,final_cluster_snapshot_identifier:Optional[str]=None,):""" Delete a cluster and optionally create a snapshot :param cluster_identifier: unique identifier of a cluster :type cluster_identifier: str :param skip_final_cluster_snapshot: determines cluster snapshot creation :type skip_final_cluster_snapshot: bool :param final_cluster_snapshot_identifier: name of final cluster snapshot :type final_cluster_snapshot_identifier: str """final_cluster_snapshot_identifier=final_cluster_snapshot_identifieror''response=self.get_conn().delete_cluster(ClusterIdentifier=cluster_identifier,SkipFinalClusterSnapshot=skip_final_cluster_snapshot,FinalClusterSnapshotIdentifier=final_cluster_snapshot_identifier,)returnresponse['Cluster']ifresponse['Cluster']elseNone
[docs]defdescribe_cluster_snapshots(self,cluster_identifier:str)->Optional[List[str]]:""" Gets a list of snapshots for a cluster :param cluster_identifier: unique identifier of a cluster :type cluster_identifier: str """response=self.get_conn().describe_cluster_snapshots(ClusterIdentifier=cluster_identifier)if'Snapshots'notinresponse:returnNonesnapshots=response['Snapshots']snapshots=[snapshotforsnapshotinsnapshotsifsnapshot["Status"]]snapshots.sort(key=lambdax:x['SnapshotCreateTime'],reverse=True)returnsnapshots
[docs]defrestore_from_cluster_snapshot(self,cluster_identifier:str,snapshot_identifier:str)->str:""" Restores a cluster from its snapshot :param cluster_identifier: unique identifier of a cluster :type cluster_identifier: str :param snapshot_identifier: unique identifier for a snapshot of a cluster :type snapshot_identifier: str """response=self.get_conn().restore_from_cluster_snapshot(ClusterIdentifier=cluster_identifier,SnapshotIdentifier=snapshot_identifier)returnresponse['Cluster']ifresponse['Cluster']elseNone
[docs]defcreate_cluster_snapshot(self,snapshot_identifier:str,cluster_identifier:str)->str:""" Creates a snapshot of a cluster :param snapshot_identifier: unique identifier for a snapshot of a cluster :type snapshot_identifier: str :param cluster_identifier: unique identifier of a cluster :type cluster_identifier: str """response=self.get_conn().create_cluster_snapshot(SnapshotIdentifier=snapshot_identifier,ClusterIdentifier=cluster_identifier,)returnresponse['Snapshot']ifresponse['Snapshot']elseNone
[docs]classRedshiftSQLHook(DbApiHook):""" Execute statements against Amazon Redshift, using redshift_connector This hook requires the redshift_conn_id connection. :param redshift_conn_id: reference to :ref:`Amazon Redshift connection id<howto/connection:redshift>` :type redshift_conn_id: str .. note:: get_sqlalchemy_engine() and get_uri() depend on sqlalchemy-amazon-redshift """
def_get_conn_params(self)->Dict[str,Union[str,int]]:"""Helper method to retrieve connection args"""conn=self.connconn_params:Dict[str,Union[str,int]]={}ifconn.login:conn_params['user']=conn.loginifconn.password:conn_params['password']=conn.passwordifconn.host:conn_params['host']=conn.hostifconn.port:conn_params['port']=conn.portifconn.schema:conn_params['database']=conn.schemareturnconn_params
[docs]defget_uri(self)->str:"""Overrides DbApiHook get_uri to use redshift_connector sqlalchemy dialect as driver name"""conn_params=self._get_conn_params()if'user'inconn_params:conn_params['username']=conn_params.pop('user')returnstr(URL(drivername='redshift+redshift_connector',**conn_params))
[docs]defget_sqlalchemy_engine(self,engine_kwargs=None):"""Overrides DbApiHook get_sqlalchemy_engine to pass redshift_connector specific kwargs"""conn_kwargs=self.conn.extra_dejsonifengine_kwargsisNone:engine_kwargs={}if"connect_args"inengine_kwargs:engine_kwargs["connect_args"]={**conn_kwargs,**engine_kwargs["connect_args"]}else:engine_kwargs["connect_args"]=conn_kwargsreturncreate_engine(self.get_uri(),**engine_kwargs)
[docs]defget_table_primary_key(self,table:str,schema:Optional[str]="public")->List[str]:""" Helper method that returns the table primary key :param table: Name of the target table :type table: str :param table: Name of the target schema, public by default :type table: str :return: Primary key columns list :rtype: List[str] """sql=""" select kcu.column_name from information_schema.table_constraints tco join information_schema.key_column_usage kcu on kcu.constraint_name = tco.constraint_name and kcu.constraint_schema = tco.constraint_schema and kcu.constraint_name = tco.constraint_name where tco.constraint_type = 'PRIMARY KEY' and kcu.table_schema = %s and kcu.table_name = %s """pk_columns=[row[0]forrowinself.get_records(sql,(schema,table))]returnpk_columnsorNone
[docs]defget_conn(self)->RedshiftConnection:"""Returns a redshift_connector.Connection object"""conn_params=self._get_conn_params()conn_kwargs=self.conn.extra_dejsonconn_kwargs:Dict={**conn_params,**conn_kwargs}conn:RedshiftConnection=redshift_connector.connect(**conn_kwargs)returnconn