# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
import asyncio
import warnings
from typing import Any, Sequence
import botocore.exceptions
from botocore.exceptions import ClientError
from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseAsyncHook, AwsBaseHook
[docs]class RedshiftHook(AwsBaseHook):
"""
Interact with Amazon Redshift.
Provide thin wrapper around :external+boto3:py:class:`boto3.client("redshift") <Redshift.Client>`.
Additional arguments (such as ``aws_conn_id``) may be specified and
are passed down to the underlying AwsBaseHook.
.. seealso::
- :class:`airflow.providers.amazon.aws.hooks.base_aws.AwsBaseHook`
"""
[docs] template_fields: Sequence[str] = ("cluster_identifier",)
def __init__(self, *args, **kwargs) -> None:
kwargs["client_type"] = "redshift"
super().__init__(*args, **kwargs)
[docs] def create_cluster(
self,
cluster_identifier: str,
node_type: str,
master_username: str,
master_user_password: str,
params: dict[str, Any],
) -> dict[str, Any]:
"""
Creates a new cluster with the specified parameters
.. seealso::
- :external+boto3:py:meth:`Redshift.Client.create_cluster`
:param cluster_identifier: A unique identifier for the cluster.
:param node_type: The node type to be provisioned for the cluster.
Valid Values: ``ds2.xlarge``, ``ds2.8xlarge``, ``dc1.large``,
``dc1.8xlarge``, ``dc2.large``, ``dc2.8xlarge``, ``ra3.xlplus``,
``ra3.4xlarge``, and ``ra3.16xlarge``.
:param master_username: The username associated with the admin user account
for the cluster that is being created.
:param master_user_password: password associated with the admin user account
for the cluster that is being created.
:param params: Remaining AWS Create cluster API params.
"""
try:
response = self.get_conn().create_cluster(
ClusterIdentifier=cluster_identifier,
NodeType=node_type,
MasterUsername=master_username,
MasterUserPassword=master_user_password,
**params,
)
return response
except ClientError as e:
raise e
# TODO: Wrap create_cluster_snapshot
[docs] def cluster_status(self, cluster_identifier: str) -> str:
"""
Return status of a cluster
.. seealso::
- :external+boto3:py:meth:`Redshift.Client.describe_clusters`
:param cluster_identifier: unique identifier of a cluster
:param skip_final_cluster_snapshot: determines cluster snapshot creation
:param final_cluster_snapshot_identifier: Optional[str]
"""
try:
response = self.get_conn().describe_clusters(ClusterIdentifier=cluster_identifier)["Clusters"]
return response[0]["ClusterStatus"] if response else None
except self.get_conn().exceptions.ClusterNotFoundFault:
return "cluster_not_found"
[docs] def delete_cluster(
self,
cluster_identifier: str,
skip_final_cluster_snapshot: bool = True,
final_cluster_snapshot_identifier: str | None = None,
):
"""
Delete a cluster and optionally create a snapshot
.. seealso::
- :external+boto3:py:meth:`Redshift.Client.delete_cluster`
:param cluster_identifier: unique identifier of a cluster
:param skip_final_cluster_snapshot: determines cluster snapshot creation
:param final_cluster_snapshot_identifier: name of final cluster snapshot
"""
final_cluster_snapshot_identifier = final_cluster_snapshot_identifier or ""
response = self.get_conn().delete_cluster(
ClusterIdentifier=cluster_identifier,
SkipFinalClusterSnapshot=skip_final_cluster_snapshot,
FinalClusterSnapshotIdentifier=final_cluster_snapshot_identifier,
)
return response["Cluster"] if response["Cluster"] else None
[docs] def describe_cluster_snapshots(self, cluster_identifier: str) -> list[str] | None:
"""
Gets a list of snapshots for a cluster
.. seealso::
- :external+boto3:py:meth:`Redshift.Client.describe_cluster_snapshots`
:param cluster_identifier: unique identifier of a cluster
"""
response = self.get_conn().describe_cluster_snapshots(ClusterIdentifier=cluster_identifier)
if "Snapshots" not in response:
return None
snapshots = response["Snapshots"]
snapshots = [snapshot for snapshot in snapshots if snapshot["Status"]]
snapshots.sort(key=lambda x: x["SnapshotCreateTime"], reverse=True)
return snapshots
[docs] def restore_from_cluster_snapshot(self, cluster_identifier: str, snapshot_identifier: str) -> str:
"""
Restores a cluster from its snapshot
.. seealso::
- :external+boto3:py:meth:`Redshift.Client.restore_from_cluster_snapshot`
:param cluster_identifier: unique identifier of a cluster
:param snapshot_identifier: unique identifier for a snapshot of a cluster
"""
response = self.get_conn().restore_from_cluster_snapshot(
ClusterIdentifier=cluster_identifier, SnapshotIdentifier=snapshot_identifier
)
return response["Cluster"] if response["Cluster"] else None
[docs] def create_cluster_snapshot(
self, snapshot_identifier: str, cluster_identifier: str, retention_period: int = -1
) -> str:
"""
Creates a snapshot of a cluster
.. seealso::
- :external+boto3:py:meth:`Redshift.Client.create_cluster_snapshot`
:param snapshot_identifier: unique identifier for a snapshot of a cluster
:param cluster_identifier: unique identifier of a cluster
:param retention_period: The number of days that a manual snapshot is retained.
If the value is -1, the manual snapshot is retained indefinitely.
"""
response = self.get_conn().create_cluster_snapshot(
SnapshotIdentifier=snapshot_identifier,
ClusterIdentifier=cluster_identifier,
ManualSnapshotRetentionPeriod=retention_period,
)
return response["Snapshot"] if response["Snapshot"] else None
[docs] def get_cluster_snapshot_status(self, snapshot_identifier: str, cluster_identifier: str | None = None):
"""
Return Redshift cluster snapshot status. If cluster snapshot not found return ``None``
:param snapshot_identifier: A unique identifier for the snapshot that you are requesting
:param cluster_identifier: (deprecated) The unique identifier of the cluster
the snapshot was created from
"""
if cluster_identifier:
warnings.warn(
"Parameter `cluster_identifier` is deprecated."
"This option will be removed in a future version.",
DeprecationWarning,
stacklevel=2,
)
try:
response = self.get_conn().describe_cluster_snapshots(
SnapshotIdentifier=snapshot_identifier,
)
snapshot = response.get("Snapshots")[0]
snapshot_status: str = snapshot.get("Status")
return snapshot_status
except self.get_conn().exceptions.ClusterSnapshotNotFoundFault:
return None
[docs]class RedshiftAsyncHook(AwsBaseAsyncHook):
"""Interact with AWS Redshift using aiobotocore library"""
def __init__(self, *args: Any, **kwargs: Any) -> None:
kwargs["client_type"] = "redshift"
super().__init__(*args, **kwargs)
[docs] async def cluster_status(self, cluster_identifier: str, delete_operation: bool = False) -> dict[str, Any]:
"""
Connects to the AWS redshift cluster via aiobotocore and get the status
and returns the status of the cluster based on the cluster_identifier passed
:param cluster_identifier: unique identifier of a cluster
:param delete_operation: whether the method has been called as part of delete cluster operation
"""
async with await self.get_client_async() as client:
try:
response = await client.describe_clusters(ClusterIdentifier=cluster_identifier)
cluster_state = (
response["Clusters"][0]["ClusterStatus"] if response and response["Clusters"] else None
)
return {"status": "success", "cluster_state": cluster_state}
except botocore.exceptions.ClientError as error:
if delete_operation and error.response.get("Error", {}).get("Code", "") == "ClusterNotFound":
return {"status": "success", "cluster_state": "cluster_not_found"}
return {"status": "error", "message": str(error)}
[docs] async def pause_cluster(self, cluster_identifier: str, poll_interval: float = 5.0) -> dict[str, Any]:
"""
Connects to the AWS redshift cluster via aiobotocore and
pause the cluster based on the cluster_identifier passed
:param cluster_identifier: unique identifier of a cluster
:param poll_interval: polling period in seconds to check for the status
"""
try:
async with await self.get_client_async() as client:
response = await client.pause_cluster(ClusterIdentifier=cluster_identifier)
status = response["Cluster"]["ClusterStatus"] if response and response["Cluster"] else None
if status == "pausing":
flag = asyncio.Event()
while True:
expected_response = await asyncio.create_task(
self.get_cluster_status(cluster_identifier, "paused", flag)
)
await asyncio.sleep(poll_interval)
if flag.is_set():
return expected_response
return {"status": "error", "cluster_state": status}
except botocore.exceptions.ClientError as error:
return {"status": "error", "message": str(error)}
[docs] async def resume_cluster(
self,
cluster_identifier: str,
polling_period_seconds: float = 5.0,
) -> dict[str, Any]:
"""
Connects to the AWS redshift cluster via aiobotocore and
resume the cluster for the cluster_identifier passed
:param cluster_identifier: unique identifier of a cluster
:param polling_period_seconds: polling period in seconds to check for the status
"""
async with await self.get_client_async() as client:
try:
response = await client.resume_cluster(ClusterIdentifier=cluster_identifier)
status = response["Cluster"]["ClusterStatus"] if response and response["Cluster"] else None
if status == "resuming":
flag = asyncio.Event()
while True:
expected_response = await asyncio.create_task(
self.get_cluster_status(cluster_identifier, "available", flag)
)
await asyncio.sleep(polling_period_seconds)
if flag.is_set():
return expected_response
return {"status": "error", "cluster_state": status}
except botocore.exceptions.ClientError as error:
return {"status": "error", "message": str(error)}
[docs] async def get_cluster_status(
self,
cluster_identifier: str,
expected_state: str,
flag: asyncio.Event,
delete_operation: bool = False,
) -> dict[str, Any]:
"""
check for expected Redshift cluster state
:param cluster_identifier: unique identifier of a cluster
:param expected_state: expected_state example("available", "pausing", "paused"")
:param flag: asyncio even flag set true if success and if any error
:param delete_operation: whether the method has been called as part of delete cluster operation
"""
try:
response = await self.cluster_status(cluster_identifier, delete_operation=delete_operation)
if ("cluster_state" in response and response["cluster_state"] == expected_state) or response[
"status"
] == "error":
flag.set()
return response
except botocore.exceptions.ClientError as error:
flag.set()
return {"status": "error", "message": str(error)}