Source code for airflow.providers.google.cloud.hooks.datapipeline

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""This module contains a Google Data Pipelines Hook."""

from __future__ import annotations

from typing import Sequence

from googleapiclient.discovery import build

from airflow.providers.google.common.hooks.base_google import (
    GoogleBaseHook,
)

[docs]DEFAULT_DATAPIPELINE_LOCATION = "us-central1"
[docs]class DataPipelineHook(GoogleBaseHook): """ Hook for Google Data Pipelines. All the methods in the hook where project_id is used must be called with keyword arguments rather than positional. """ def __init__( self, gcp_conn_id: str = "google_cloud_default", impersonation_chain: str | Sequence[str] | None = None, **kwargs, ) -> None: super().__init__( gcp_conn_id=gcp_conn_id, impersonation_chain=impersonation_chain, )
[docs] def get_conn(self) -> build: """Return a Google Cloud Data Pipelines service object.""" http_authorized = self._authorize() return build("datapipelines", "v1", http=http_authorized, cache_discovery=False)
@GoogleBaseHook.fallback_to_default_project_id
[docs] def create_data_pipeline( self, body: dict, project_id: str, location: str = DEFAULT_DATAPIPELINE_LOCATION, ) -> None: """ Create a new Data Pipelines instance from the Data Pipelines API. :param body: The request body (contains instance of Pipeline). See: https://cloud.google.com/dataflow/docs/reference/data-pipelines/rest/v1/projects.locations.pipelines/create#request-body :param project_id: The ID of the GCP project that owns the job. :param location: The location to direct the Data Pipelines instance to (for example us-central1). Returns the created Data Pipelines instance in JSON representation. """ parent = self.build_parent_name(project_id, location) service = self.get_conn() self.log.info(dir(service.projects().locations())) request = ( service.projects() .locations() .pipelines() .create( parent=parent, body=body, ) ) response = request.execute(num_retries=self.num_retries) return response
@GoogleBaseHook.fallback_to_default_project_id
[docs] def run_data_pipeline( self, data_pipeline_name: str, project_id: str, location: str = DEFAULT_DATAPIPELINE_LOCATION, ) -> None: """ Run a Data Pipelines Instance using the Data Pipelines API. :param data_pipeline_name: The display name of the pipeline. In example projects/PROJECT_ID/locations/LOCATION_ID/pipelines/PIPELINE_ID it would be the PIPELINE_ID. :param project_id: The ID of the GCP project that owns the job. :param location: The location to direct the Data Pipelines instance to (for example us-central1). Returns the created Job in JSON representation. """ parent = self.build_parent_name(project_id, location) service = self.get_conn() request = ( service.projects() .locations() .pipelines() .run( name=f"{parent}/pipelines/{data_pipeline_name}", body={}, ) ) response = request.execute(num_retries=self.num_retries) return response
@staticmethod
[docs] def build_parent_name(project_id: str, location: str): return f"projects/{project_id}/locations/{location}"

Was this entry helpful?