# -*- coding: utf-8 -*-
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
This module contains a CloudDLPHook
which allows you to connect to GCP Cloud DLP service.
"""
import re
import time
from google.cloud.dlp_v2 import DlpServiceClient
from google.cloud.dlp_v2.types import DlpJob
from airflow import AirflowException
from airflow.contrib.hooks.gcp_api_base_hook import GoogleCloudBaseHook
[docs]DLP_JOB_PATH_PATTERN = '^projects/[^/]+/dlpJobs/(?P<job>.*?)$'
# Time to sleep between active checks of the operation results
[docs]TIME_TO_SLEEP_IN_SECONDS = 1
# pylint: disable=R0904, C0302
[docs]class CloudDLPHook(GoogleCloudBaseHook):
"""
Hook for Google Cloud Data Loss Prevention (DLP) APIs.
Cloud DLP allows clients to detect the presence of Personally Identifiable
Information (PII) and other privacy-sensitive data in user-supplied,
unstructured data streams, like text blocks or images. The service also
includes methods for sensitive data redaction and scheduling of data scans
on Google Cloud Platform based data sets.
:param gcp_conn_id: The connection ID to use when fetching connection info.
:type gcp_conn_id: str
:param delegate_to: The account to impersonate, if any.
For this to work, the service account making the request must have
domain-wide delegation enabled.
:type delegate_to: str
"""
def __init__(self,
gcp_conn_id="google_cloud_default",
delegate_to=None):
super(CloudDLPHook, self).__init__(gcp_conn_id, delegate_to)
self._client = None
[docs] def get_conn(self):
"""
Provides a client for interacting with the Cloud DLP API.
:return: GCP Cloud DLP API Client
:rtype: google.cloud.dlp_v2.DlpServiceClient
"""
if not self._client:
self._client = DlpServiceClient(credentials=self._get_credentials())
return self._client
@GoogleCloudBaseHook.catch_http_exception
@GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def cancel_dlp_job(
self, dlp_job_id, project_id=None, retry=None, timeout=None, metadata=None
):
"""
Starts asynchronous cancellation on a long-running DLP job.
:param dlp_job_id: ID of the DLP job resource to be cancelled.
:type dlp_job_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. If set to None or missing, the default project_id
from the GCP connection is used.
:type project_id: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
"""
client = self.get_conn()
if not dlp_job_id:
raise AirflowException(
"Please provide the ID of the DLP job resource to be cancelled."
)
name = DlpServiceClient.dlp_job_path(project_id, dlp_job_id)
client.cancel_dlp_job(
name=name, retry=retry, timeout=timeout, metadata=metadata
)
@GoogleCloudBaseHook.catch_http_exception
[docs] def create_deidentify_template(
self,
organization_id=None,
project_id=None,
deidentify_template=None,
template_id=None,
retry=None,
timeout=None,
metadata=None,
):
"""
Creates a deidentify template for re-using frequently used configuration for
de-identifying content, images, and storage.
:param organization_id: (Optional) The organization ID. Required to set this
field if parent resource is an organzation.
:type organization_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. Only set this field if the parent resource is
a project instead of an organzation.
:type project_id: str
:param deidentify_template: (Optional) The deidentify template to create.
:type deidentify_template: dict or google.cloud.dlp_v2.types.DeidentifyTemplate
:param template_id: (Optional) The template ID.
:type template_id: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.DeidentifyTemplate
"""
client = self.get_conn()
# Handle project_id from connection configuration
project_id = project_id or self.project_id
if organization_id:
parent = DlpServiceClient.organization_path(organization_id)
elif project_id:
parent = DlpServiceClient.project_path(project_id)
else:
raise AirflowException(
"Please provide either organization_id or project_id."
)
return client.create_deidentify_template(
parent=parent,
deidentify_template=deidentify_template,
template_id=template_id,
retry=retry,
timeout=timeout,
metadata=metadata,
)
@GoogleCloudBaseHook.catch_http_exception
@GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def create_dlp_job(
self,
project_id=None,
inspect_job=None,
risk_job=None,
job_id=None,
retry=None,
timeout=None,
metadata=None,
wait_until_finished=True
):
"""
Creates a new job to inspect storage or calculate risk metrics.
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. If set to None or missing, the default
project_id from the GCP connection is used.
:type project_id: str
:param inspect_job: (Optional) The configuration for the inspect job.
:type inspect_job: dict or google.cloud.dlp_v2.types.InspectJobConfig
:param risk_job: (Optional) The configuration for the risk job.
:type risk_job: dict or google.cloud.dlp_v2.types.RiskAnalysisJobConfig
:param job_id: (Optional) The job ID.
:type job_id: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:param wait_until_finished: (Optional) If true, it will keep polling the job state
until it is set to DONE.
:type wait_until_finished: bool
:rtype: google.cloud.dlp_v2.types.DlpJob
"""
client = self.get_conn()
parent = DlpServiceClient.project_path(project_id)
job = client.create_dlp_job(
parent=parent,
inspect_job=inspect_job,
risk_job=risk_job,
job_id=job_id,
retry=retry,
timeout=timeout,
metadata=metadata,
)
if wait_until_finished:
pattern = re.compile(DLP_JOB_PATH_PATTERN, re.IGNORECASE)
match = pattern.match(job.name)
job_name = match.groupdict()['job']
while wait_until_finished:
job = self.get_dlp_job(
dlp_job_id=job_name,
project_id=project_id)
self.log.info(
'DLP job {} state: {}.'.format(
job.name,
DlpJob.JobState.Name(job.state)
)
)
if job.state == DlpJob.JobState.DONE:
return job
elif job.state in [DlpJob.JobState.PENDING,
DlpJob.JobState.RUNNING,
DlpJob.JobState.JOB_STATE_UNSPECIFIED]:
time.sleep(TIME_TO_SLEEP_IN_SECONDS)
else:
raise AirflowException(
'Stopped polling DLP job state. DLP job {} state: {}.'
.format(
job.name,
DlpJob.JobState.Name(job.state)
)
)
return job
@GoogleCloudBaseHook.catch_http_exception
[docs] def create_inspect_template(
self,
organization_id=None,
project_id=None,
inspect_template=None,
template_id=None,
retry=None,
timeout=None,
metadata=None,
):
"""
Creates an inspect template for re-using frequently used configuration for
inspecting content, images, and storage.
:param organization_id: (Optional) The organization ID. Required to set this
field if parent resource is an organzation.
:type organization_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. Only set this field if the parent resource is
a project instead of an organzation.
:type project_id: str
:param inspect_template: (Optional) The inspect template to create.
:type inspect_template: dict or google.cloud.dlp_v2.types.InspectTemplate
:param template_id: (Optional) The template ID.
:type template_id: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.InspectTemplate
"""
client = self.get_conn()
# Handle project_id from connection configuration
project_id = project_id or self.project_id
if organization_id:
parent = DlpServiceClient.organization_path(organization_id)
elif project_id:
parent = DlpServiceClient.project_path(project_id)
else:
raise AirflowException(
"Please provide either organization_id or project_id."
)
return client.create_inspect_template(
parent=parent,
inspect_template=inspect_template,
template_id=template_id,
retry=retry,
timeout=timeout,
metadata=metadata,
)
@GoogleCloudBaseHook.catch_http_exception
@GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def create_job_trigger(
self,
project_id=None,
job_trigger=None,
trigger_id=None,
retry=None,
timeout=None,
metadata=None,
):
"""
Creates a job trigger to run DLP actions such as scanning storage for sensitive
information on a set schedule.
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. If set to None or missing, the default
project_id from the GCP connection is used.
:type project_id: str
:param job_trigger: (Optional) The job trigger to create.
:type job_trigger: dict or google.cloud.dlp_v2.types.JobTrigger
:param trigger_id: (Optional) The job trigger ID.
:type trigger_id: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.JobTrigger
"""
client = self.get_conn()
parent = DlpServiceClient.project_path(project_id)
return client.create_job_trigger(
parent=parent,
job_trigger=job_trigger,
trigger_id=trigger_id,
retry=retry,
timeout=timeout,
metadata=metadata,
)
@GoogleCloudBaseHook.catch_http_exception
[docs] def create_stored_info_type(
self,
organization_id=None,
project_id=None,
config=None,
stored_info_type_id=None,
retry=None,
timeout=None,
metadata=None,
):
"""
Creates a pre-built stored info type to be used for inspection.
:param organization_id: (Optional) The organization ID. Required to set this
field if parent resource is an organzation.
:type organization_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. Only set this field if the parent resource is
a project instead of an organzation.
:type project_id: str
:param config: (Optional) The config for the stored info type.
:type config: dict or google.cloud.dlp_v2.types.StoredInfoTypeConfig
:param stored_info_type_id: (Optional) The stored info type ID.
:type stored_info_type_id: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.StoredInfoType
"""
client = self.get_conn()
# Handle project_id from connection configuration
project_id = project_id or self.project_id
if organization_id:
parent = DlpServiceClient.organization_path(organization_id)
elif project_id:
parent = DlpServiceClient.project_path(project_id)
else:
raise AirflowException(
"Please provide either organization_id or project_id."
)
return client.create_stored_info_type(
parent=parent,
config=config,
stored_info_type_id=stored_info_type_id,
retry=retry,
timeout=timeout,
metadata=metadata,
)
@GoogleCloudBaseHook.catch_http_exception
@GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def deidentify_content(
self,
project_id=None,
deidentify_config=None,
inspect_config=None,
item=None,
inspect_template_name=None,
deidentify_template_name=None,
retry=None,
timeout=None,
metadata=None,
):
"""
De-identifies potentially sensitive info from a content item. This method has limits
on input size and output size.
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. If set to None or missing, the default
project_id from the GCP connection is used.
:type project_id: str
:param deidentify_config: (Optional) Configuration for the de-identification of the
content item. Items specified here will override the template referenced by the
deidentify_template_name argument.
:type deidentify_config: dict or google.cloud.dlp_v2.types.DeidentifyConfig
:param inspect_config: (Optional) Configuration for the inspector. Items specified
here will override the template referenced by the inspect_template_name argument.
:type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig
:param item: (Optional) The item to de-identify. Will be treated as text.
:type item: dict or google.cloud.dlp_v2.types.ContentItem
:param inspect_template_name: (Optional) Optional template to use. Any configuration
directly specified in inspect_config will override those set in the template.
:type inspect_template_name: str
:param deidentify_template_name: (Optional) Optional template to use. Any
configuration directly specified in deidentify_config will override those set
in the template.
:type deidentify_template_name: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.DeidentifyContentResponse
"""
client = self.get_conn()
parent = DlpServiceClient.project_path(project_id)
return client.deidentify_content(
parent=parent,
deidentify_config=deidentify_config,
inspect_config=inspect_config,
item=item,
inspect_template_name=inspect_template_name,
deidentify_template_name=deidentify_template_name,
retry=retry,
timeout=timeout,
metadata=metadata,
)
@GoogleCloudBaseHook.catch_http_exception
[docs] def delete_deidentify_template(
self,
template_id,
organization_id=None,
project_id=None,
retry=None,
timeout=None,
metadata=None
):
"""
Deletes a deidentify template.
:param template_id: The ID of deidentify template to be deleted.
:type template_id: str
:param organization_id: (Optional) The organization ID. Required to set this
field if parent resource is an organzation.
:type organization_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. Only set this field if the parent resource is
a project instead of an organzation.
:type project_id: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
"""
client = self.get_conn()
if not template_id:
raise AirflowException(
"Please provide the ID of deidentify template to be deleted."
)
# Handle project_id from connection configuration
project_id = project_id or self.project_id
if organization_id:
name = DlpServiceClient.organization_deidentify_template_path(organization_id, template_id)
elif project_id:
name = DlpServiceClient.project_deidentify_template_path(project_id, template_id)
else:
raise AirflowException(
"Please provide either organization_id or project_id."
)
client.delete_deidentify_template(
name=name, retry=retry, timeout=timeout, metadata=metadata
)
@GoogleCloudBaseHook.catch_http_exception
@GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def delete_dlp_job(
self, dlp_job_id, project_id=None, retry=None, timeout=None, metadata=None
):
"""
Deletes a long-running DLP job. This method indicates that the client is no longer
interested in the DLP job result. The job will be cancelled if possible.
:param dlp_job_id: The ID of the DLP job resource to be cancelled.
:type dlp_job_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. If set to None or missing, the default
project_id from the GCP connection is used.
:type project_id: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
"""
client = self.get_conn()
if not dlp_job_id:
raise AirflowException(
"Please provide the ID of the DLP job resource to be cancelled."
)
name = DlpServiceClient.dlp_job_path(project_id, dlp_job_id)
client.delete_dlp_job(
name=name, retry=retry, timeout=timeout, metadata=metadata
)
@GoogleCloudBaseHook.catch_http_exception
[docs] def delete_inspect_template(
self,
template_id,
organization_id=None,
project_id=None,
retry=None,
timeout=None,
metadata=None
):
"""
Deletes an inspect template.
:param template_id: The ID of the inspect template to be deleted.
:type template_id: str
:param organization_id: (Optional) The organization ID. Required to set this
field if parent resource is an organzation.
:type organization_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. Only set this field if the parent resource is
a project instead of an organzation.
:type project_id: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
"""
client = self.get_conn()
if not template_id:
raise AirflowException(
"Please provide the ID of the inspect template to be deleted."
)
# Handle project_id from connection configuration
project_id = project_id or self.project_id
if organization_id:
name = DlpServiceClient.organization_inspect_template_path(organization_id, template_id)
elif project_id:
name = DlpServiceClient.project_inspect_template_path(project_id, template_id)
else:
raise AirflowException(
"Please provide either organization_id or project_id."
)
client.delete_inspect_template(
name=name, retry=retry, timeout=timeout, metadata=metadata
)
@GoogleCloudBaseHook.catch_http_exception
@GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def delete_job_trigger(
self,
job_trigger_id,
project_id=None,
retry=None,
timeout=None,
metadata=None
):
"""
Deletes a job trigger.
:param job_trigger_id: The ID of the DLP job trigger to be deleted.
:type job_trigger_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. If set to None or missing, the default
project_id from the GCP connection is used.
:type project_id: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
"""
client = self.get_conn()
if not job_trigger_id:
raise AirflowException(
"Please provide the ID of the DLP job trigger to be deleted."
)
name = DlpServiceClient.project_job_trigger_path(project_id, job_trigger_id)
client.delete_job_trigger(
name=name, retry=retry, timeout=timeout, metadata=metadata
)
@GoogleCloudBaseHook.catch_http_exception
[docs] def delete_stored_info_type(
self,
stored_info_type_id,
organization_id=None,
project_id=None,
retry=None,
timeout=None,
metadata=None
):
"""
Deletes a stored info type.
:param stored_info_type_id: The ID of the stored info type to be deleted.
:type stored_info_type_id: str
:param organization_id: (Optional) The organization ID. Required to set this
field if parent resource is an organzation.
:type organization_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. Only set this field if the parent resource is
a project instead of an organzation.
:type project_id: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
"""
client = self.get_conn()
if not stored_info_type_id:
raise AirflowException(
"Please provide the ID of the stored info type to be deleted."
)
# Handle project_id from connection configuration
project_id = project_id or self.project_id
if organization_id:
name = DlpServiceClient.organization_stored_info_type_path(
organization_id, stored_info_type_id
)
elif project_id:
name = DlpServiceClient.project_stored_info_type_path(
project_id,
stored_info_type_id
)
else:
raise AirflowException(
"Please provide either organization_id or project_id."
)
client.delete_stored_info_type(
name=name, retry=retry, timeout=timeout, metadata=metadata
)
@GoogleCloudBaseHook.catch_http_exception
[docs] def get_deidentify_template(
self,
template_id,
organization_id=None,
project_id=None,
retry=None,
timeout=None,
metadata=None
):
"""
Gets a deidentify template.
:param template_id: The ID of deidentify template to be read.
:type template_id: str
:param organization_id: (Optional) The organization ID. Required to set this
field if parent resource is an organzation.
:type organization_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. Only set this field if the parent resource is
a project instead of an organzation.
:type project_id: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.DeidentifyTemplate
"""
client = self.get_conn()
if not template_id:
raise AirflowException(
"Please provide the ID of the deidentify template to be read."
)
# Handle project_id from connection configuration
project_id = project_id or self.project_id
if organization_id:
name = DlpServiceClient.organization_deidentify_template_path(organization_id, template_id)
elif project_id:
name = DlpServiceClient.project_deidentify_template_path(project_id, template_id)
else:
raise AirflowException(
"Please provide either organization_id or project_id."
)
return client.get_deidentify_template(
name=name, retry=retry, timeout=timeout, metadata=metadata
)
@GoogleCloudBaseHook.catch_http_exception
@GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def get_dlp_job(
self,
dlp_job_id,
project_id=None,
retry=None,
timeout=None,
metadata=None
):
"""
Gets the latest state of a long-running Dlp Job.
:param dlp_job_id: The ID of the DLP job resource to be read.
:type dlp_job_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. If set to None or missing, the default
project_id from the GCP connection is used.
:type project_id: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.DlpJob
"""
client = self.get_conn()
if not dlp_job_id:
raise AirflowException(
"Please provide the ID of the DLP job resource to be read."
)
name = DlpServiceClient.dlp_job_path(project_id, dlp_job_id)
return client.get_dlp_job(
name=name, retry=retry, timeout=timeout, metadata=metadata
)
@GoogleCloudBaseHook.catch_http_exception
[docs] def get_inspect_template(
self,
template_id,
organization_id=None,
project_id=None,
retry=None,
timeout=None,
metadata=None
):
"""
Gets an inspect template.
:param template_id: The ID of inspect template to be read.
:type template_id: str
:param organization_id: (Optional) The organization ID. Required to set this
field if parent resource is an organzation.
:type organization_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. Only set this field if the parent resource is
a project instead of an organzation.
:type project_id: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.InspectTemplate
"""
client = self.get_conn()
if not template_id:
raise AirflowException(
"Please provide the ID of the inspect template to be read."
)
# Handle project_id from connection configuration
project_id = project_id or self.project_id
if organization_id:
name = DlpServiceClient.organization_inspect_template_path(organization_id, template_id)
elif project_id:
name = DlpServiceClient.project_inspect_template_path(project_id, template_id)
else:
raise AirflowException(
"Please provide either organization_id or project_id."
)
return client.get_inspect_template(
name=name, retry=retry, timeout=timeout, metadata=metadata
)
@GoogleCloudBaseHook.catch_http_exception
@GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def get_job_trigger(
self,
job_trigger_id,
project_id=None,
retry=None,
timeout=None,
metadata=None
):
"""
Gets a DLP job trigger.
:param job_trigger_id: The ID of the DLP job trigger to be read.
:type job_trigger_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. If set to None or missing, the default
project_id from the GCP connection is used.
:type project_id: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.JobTrigger
"""
client = self.get_conn()
if not job_trigger_id:
raise AirflowException(
"Please provide the ID of the DLP job trigger to be read."
)
name = DlpServiceClient.project_job_trigger_path(project_id, job_trigger_id)
return client.get_job_trigger(
name=name, retry=retry, timeout=timeout, metadata=metadata
)
@GoogleCloudBaseHook.catch_http_exception
[docs] def get_stored_info_type(
self,
stored_info_type_id,
organization_id=None,
project_id=None,
retry=None,
timeout=None,
metadata=None
):
"""
Gets a stored info type.
:param stored_info_type_id: The ID of the stored info type to be read.
:type stored_info_type_id: str
:param organization_id: (Optional) The organization ID. Required to set this
field if parent resource is an organzation.
:type organization_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. Only set this field if the parent resource is
a project instead of an organzation.
:type project_id: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.StoredInfoType
"""
client = self.get_conn()
if not stored_info_type_id:
raise AirflowException(
"Please provide the ID of the stored info type to be read."
)
# Handle project_id from connection configuration
project_id = project_id or self.project_id
if organization_id:
name = DlpServiceClient.organization_stored_info_type_path(
organization_id, stored_info_type_id
)
elif project_id:
name = DlpServiceClient.project_stored_info_type_path(
project_id,
stored_info_type_id
)
else:
raise AirflowException(
"Please provide either organization_id or project_id."
)
return client.get_stored_info_type(
name=name, retry=retry, timeout=timeout, metadata=metadata
)
@GoogleCloudBaseHook.catch_http_exception
@GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def inspect_content(
self,
project_id=None,
inspect_config=None,
item=None,
inspect_template_name=None,
retry=None,
timeout=None,
metadata=None,
):
"""
Finds potentially sensitive info in content. This method has limits on input size,
processing time, and output size.
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. If set to None or missing, the default
project_id from the GCP connection is used.
:type project_id: str
:param inspect_config: (Optional) Configuration for the inspector. Items specified
here will override the template referenced by the inspect_template_name argument.
:type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig
:param item: (Optional) The item to de-identify. Will be treated as text.
:type item: dict or google.cloud.dlp_v2.types.ContentItem
:param inspect_template_name: (Optional) Optional template to use. Any configuration
directly specified in inspect_config will override those set in the template.
:type inspect_template_name: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.InspectContentResponse
"""
client = self.get_conn()
parent = DlpServiceClient.project_path(project_id)
return client.inspect_content(
parent=parent,
inspect_config=inspect_config,
item=item,
inspect_template_name=inspect_template_name,
retry=retry,
timeout=timeout,
metadata=metadata,
)
@GoogleCloudBaseHook.catch_http_exception
[docs] def list_deidentify_templates(
self,
organization_id=None,
project_id=None,
page_size=None,
order_by=None,
retry=None,
timeout=None,
metadata=None,
):
"""
Lists deidentify templates.
:param organization_id: (Optional) The organization ID. Required to set this
field if parent resource is an organzation.
:type organization_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. Only set this field if the parent resource is
a project instead of an organzation.
:type project_id: str
:param page_size: (Optional) The maximum number of resources contained in the
underlying API response.
:type page_size: int
:param order_by: (Optional) Optional comma separated list of fields to order by,
followed by asc or desc postfix.
:type order_by: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: list[google.cloud.dlp_v2.types.DeidentifyTemplate]
"""
client = self.get_conn()
# Handle project_id from connection configuration
project_id = project_id or self.project_id
if organization_id:
parent = DlpServiceClient.organization_path(organization_id)
elif project_id:
parent = DlpServiceClient.project_path(project_id)
else:
raise AirflowException(
"Please provide either organization_id or project_id."
)
results = client.list_deidentify_templates(
parent=parent,
page_size=page_size,
order_by=order_by,
retry=retry,
timeout=timeout,
metadata=metadata,
)
return list(results)
@GoogleCloudBaseHook.catch_http_exception
@GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def list_dlp_jobs(
self,
project_id=None,
results_filter=None,
page_size=None,
job_type=None,
order_by=None,
retry=None,
timeout=None,
metadata=None,
):
"""
Lists DLP jobs that match the specified filter in the request.
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. If set to None or missing, the default
project_id from the GCP connection is used.
:type project_id: str
:param results_filter: (Optional) Filter used to specify a subset of results.
:type results_filter: str
:param page_size: (Optional) The maximum number of resources contained in the
underlying API response.
:type page_size: int
:param job_type: (Optional) The type of job.
:type job_type: str
:param order_by: (Optional) Optional comma separated list of fields to order by,
followed by asc or desc postfix.
:type order_by: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: list[google.cloud.dlp_v2.types.DlpJob]
"""
client = self.get_conn()
parent = DlpServiceClient.project_path(project_id)
results = client.list_dlp_jobs(
parent=parent,
filter_=results_filter,
page_size=page_size,
type_=job_type,
order_by=order_by,
retry=retry,
timeout=timeout,
metadata=metadata,
)
return list(results)
@GoogleCloudBaseHook.catch_http_exception
[docs] def list_info_types(
self,
language_code=None,
results_filter=None,
retry=None,
timeout=None,
metadata=None,
):
"""
Returns a list of the sensitive information types that the DLP API supports.
:param language_code: (Optional) Optional BCP-47 language code for localized info
type friendly names. If omitted, or if localized strings are not available,
en-US strings will be returned.
:type language_code: str
:param results_filter: (Optional) Filter used to specify a subset of results.
:type results_filter: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.ListInfoTypesResponse
"""
client = self.get_conn()
return client.list_info_types(
language_code=language_code,
filter_=results_filter,
retry=retry,
timeout=timeout,
metadata=metadata,
)
@GoogleCloudBaseHook.catch_http_exception
[docs] def list_inspect_templates(
self,
organization_id=None,
project_id=None,
page_size=None,
order_by=None,
retry=None,
timeout=None,
metadata=None,
):
"""
Lists inspect templates.
:param organization_id: (Optional) The organization ID. Required to set this
field if parent resource is an organzation.
:type organization_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. Only set this field if the parent resource is
a project instead of an organzation.
:type project_id: str
:param page_size: (Optional) The maximum number of resources contained in the
underlying API response.
:type page_size: int
:param order_by: (Optional) Optional comma separated list of fields to order by,
followed by asc or desc postfix.
:type order_by: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: list[google.cloud.dlp_v2.types.InspectTemplate]
"""
client = self.get_conn()
# Handle project_id from connection configuration
project_id = project_id or self.project_id
if organization_id:
parent = DlpServiceClient.organization_path(organization_id)
elif project_id:
parent = DlpServiceClient.project_path(project_id)
else:
raise AirflowException(
"Please provide either organization_id or project_id."
)
results = client.list_inspect_templates(
parent=parent,
page_size=page_size,
order_by=order_by,
retry=retry,
timeout=timeout,
metadata=metadata,
)
return list(results)
@GoogleCloudBaseHook.catch_http_exception
@GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def list_job_triggers(
self,
project_id=None,
page_size=None,
order_by=None,
results_filter=None,
retry=None,
timeout=None,
metadata=None,
):
"""
Lists job triggers.
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. If set to None or missing, the default
project_id from the GCP connection is used.
:type project_id: str
:param page_size: (Optional) The maximum number of resources contained in the
underlying API response.
:type page_size: int
:param order_by: (Optional) Optional comma separated list of fields to order by,
followed by asc or desc postfix.
:type order_by: str
:param results_filter: (Optional) Filter used to specify a subset of results.
:type results_filter: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: list[google.cloud.dlp_v2.types.JobTrigger]
"""
client = self.get_conn()
parent = DlpServiceClient.project_path(project_id)
results = client.list_job_triggers(
parent=parent,
page_size=page_size,
order_by=order_by,
filter_=results_filter,
retry=retry,
timeout=timeout,
metadata=metadata,
)
return list(results)
@GoogleCloudBaseHook.catch_http_exception
[docs] def list_stored_info_types(
self,
organization_id=None,
project_id=None,
page_size=None,
order_by=None,
retry=None,
timeout=None,
metadata=None,
):
"""
Lists stored info types.
:param organization_id: (Optional) The organization ID. Required to set this
field if parent resource is an organzation.
:type organization_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. Only set this field if the parent resource is
a project instead of an organzation.
:type project_id: str
:param page_size: (Optional) The maximum number of resources contained in the
underlying API response.
:type page_size: int
:param order_by: (Optional) Optional comma separated list of fields to order by,
followed by asc or desc postfix.
:type order_by: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: list[google.cloud.dlp_v2.types.StoredInfoType]
"""
client = self.get_conn()
# Handle project_id from connection configuration
project_id = project_id or self.project_id
if organization_id:
parent = DlpServiceClient.organization_path(organization_id)
elif project_id:
parent = DlpServiceClient.project_path(project_id)
else:
raise AirflowException(
"Please provide either organization_id or project_id."
)
results = client.list_stored_info_types(
parent=parent,
page_size=page_size,
order_by=order_by,
retry=retry,
timeout=timeout,
metadata=metadata,
)
return list(results)
@GoogleCloudBaseHook.catch_http_exception
@GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def redact_image(
self,
project_id=None,
inspect_config=None,
image_redaction_configs=None,
include_findings=None,
byte_item=None,
retry=None,
timeout=None,
metadata=None,
):
"""
Redacts potentially sensitive info from an image. This method has limits on
input size, processing time, and output size.
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. If set to None or missing, the default
project_id from the GCP connection is used.
:type project_id: str
:param inspect_config: (Optional) Configuration for the inspector. Items specified
here will override the template referenced by the inspect_template_name argument.
:type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig
:param image_redaction_configs: (Optional) The configuration for specifying what
content to redact from images.
:type image_redaction_configs: list[dict] or list[google.cloud.dlp_v2.types.ImageRedactionConfig]
:param include_findings: (Optional) Whether the response should include findings
along with the redacted image.
:type include_findings: bool
:param byte_item: (Optional) The content must be PNG, JPEG, SVG or BMP.
:type byte_item: dict or google.cloud.dlp_v2.types.ByteContentItem
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.RedactImageResponse
"""
client = self.get_conn()
parent = DlpServiceClient.project_path(project_id)
return client.redact_image(
parent=parent,
inspect_config=inspect_config,
image_redaction_configs=image_redaction_configs,
include_findings=include_findings,
byte_item=byte_item,
retry=retry,
timeout=timeout,
metadata=metadata,
)
@GoogleCloudBaseHook.catch_http_exception
@GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def reidentify_content(
self,
project_id=None,
reidentify_config=None,
inspect_config=None,
item=None,
inspect_template_name=None,
reidentify_template_name=None,
retry=None,
timeout=None,
metadata=None,
):
"""
Re-identifies content that has been de-identified.
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. If set to None or missing, the default
project_id from the GCP connection is used.
:type project_id: str
:param reidentify_config: (Optional) Configuration for the re-identification of
the content item.
:type reidentify_config: dict or google.cloud.dlp_v2.types.DeidentifyConfig
:param inspect_config: (Optional) Configuration for the inspector.
:type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig
:param item: (Optional) The item to re-identify. Will be treated as text.
:type item: dict or google.cloud.dlp_v2.types.ContentItem
:param inspect_template_name: (Optional) Optional template to use. Any configuration
directly specified in inspect_config will override those set in the template.
:type inspect_template_name: str
:param reidentify_template_name: (Optional) Optional template to use. References an
instance of deidentify template. Any configuration directly specified in
reidentify_config or inspect_config will override those set in the template.
:type reidentify_template_name: str
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.ReidentifyContentResponse
"""
client = self.get_conn()
parent = DlpServiceClient.project_path(project_id)
return client.reidentify_content(
parent=parent,
reidentify_config=reidentify_config,
inspect_config=inspect_config,
item=item,
inspect_template_name=inspect_template_name,
reidentify_template_name=reidentify_template_name,
retry=retry,
timeout=timeout,
metadata=metadata,
)
@GoogleCloudBaseHook.catch_http_exception
[docs] def update_deidentify_template(
self,
template_id,
organization_id=None,
project_id=None,
deidentify_template=None,
update_mask=None,
retry=None,
timeout=None,
metadata=None,
):
"""
Updates the deidentify template.
:param template_id: The ID of deidentify template to be updated.
:type template_id: str
:param organization_id: (Optional) The organization ID. Required to set this
field if parent resource is an organzation.
:type organization_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. Only set this field if the parent resource is
a project instead of an organzation.
:type project_id: str
:param deidentify_template: New deidentify template value.
:type deidentify_template: dict or google.cloud.dlp_v2.types.DeidentifyTemplate
:param update_mask: Mask to control which fields get updated.
:type update_mask: dict or google.cloud.dlp_v2.types.FieldMask
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.DeidentifyTemplate
"""
client = self.get_conn()
if not template_id:
raise AirflowException(
"Please provide the ID of deidentify template to be updated."
)
# Handle project_id from connection configuration
project_id = project_id or self.project_id
if organization_id:
name = DlpServiceClient.organization_deidentify_template_path(organization_id, template_id)
elif project_id:
name = DlpServiceClient.project_deidentify_template_path(project_id, template_id)
else:
raise AirflowException(
"Please provide either organization_id or project_id."
)
return client.update_deidentify_template(
name=name,
deidentify_template=deidentify_template,
update_mask=update_mask,
retry=retry,
timeout=timeout,
metadata=metadata,
)
@GoogleCloudBaseHook.catch_http_exception
[docs] def update_inspect_template(
self,
template_id,
organization_id=None,
project_id=None,
inspect_template=None,
update_mask=None,
retry=None,
timeout=None,
metadata=None,
):
"""
Updates the inspect template.
:param template_id: The ID of the inspect template to be updated.
:type template_id: str
:param organization_id: (Optional) The organization ID. Required to set this
field if parent resource is an organzation.
:type organization_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. Only set this field if the parent resource is
a project instead of an organzation.
:type project_id: str
:param inspect_template: New inspect template value.
:type inspect_template: dict or google.cloud.dlp_v2.types.InspectTemplate
:param update_mask: Mask to control which fields get updated.
:type update_mask: dict or google.cloud.dlp_v2.types.FieldMask
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.InspectTemplate
"""
client = self.get_conn()
if not template_id:
raise AirflowException(
"Please provide the ID of the inspect template to be updated."
)
# Handle project_id from connection configuration
project_id = project_id or self.project_id
if organization_id:
name = DlpServiceClient.organization_inspect_template_path(organization_id, template_id)
elif project_id:
name = DlpServiceClient.project_inspect_template_path(project_id, template_id)
else:
raise AirflowException(
"Please provide either organization_id or project_id."
)
return client.update_inspect_template(
name=name,
inspect_template=inspect_template,
update_mask=update_mask,
retry=retry,
timeout=timeout,
metadata=metadata,
)
@GoogleCloudBaseHook.catch_http_exception
@GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def update_job_trigger(
self,
job_trigger_id,
project_id=None,
job_trigger=None,
update_mask=None,
retry=None,
timeout=None,
metadata=None,
):
"""
Updates a job trigger.
:param job_trigger_id: The ID of the DLP job trigger to be updated.
:type job_trigger_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. If set to None or missing, the default
project_id from the GCP connection is used.
:type project_id: str
:param job_trigger: New job trigger value.
:type job_trigger: dict or google.cloud.dlp_v2.types.JobTrigger
:param update_mask: Mask to control which fields get updated.
:type update_mask: dict or google.cloud.dlp_v2.types.FieldMask
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.JobTrigger
"""
client = self.get_conn()
if not job_trigger_id:
raise AirflowException(
"Please provide the ID of the DLP job trigger to be updated."
)
name = DlpServiceClient.project_job_trigger_path(project_id, job_trigger_id)
return client.update_job_trigger(
name=name,
job_trigger=job_trigger,
update_mask=update_mask,
retry=retry,
timeout=timeout,
metadata=metadata,
)
@GoogleCloudBaseHook.catch_http_exception
[docs] def update_stored_info_type(
self,
stored_info_type_id,
organization_id=None,
project_id=None,
config=None,
update_mask=None,
retry=None,
timeout=None,
metadata=None,
):
"""
Updates the stored info type by creating a new version.
:param stored_info_type_id: The ID of the stored info type to be updated.
:type stored_info_type_id: str
:param organization_id: (Optional) The organization ID. Required to set this
field if parent resource is an organzation.
:type organization_id: str
:param project_id: (Optional) Google Cloud Platform project ID where the
DLP Instance exists. Only set this field if the parent resource is
a project instead of an organzation.
:type project_id: str
:param config: Updated configuration for the stored info type. If not provided, a new
version of the stored info type will be created with the existing configuration.
:type config: dict or google.cloud.dlp_v2.types.StoredInfoTypeConfig
:param update_mask: Mask to control which fields get updated.
:type update_mask: dict or google.cloud.dlp_v2.types.FieldMask
:param retry: (Optional) A retry object used to retry requests.
If None is specified, requests will not be retried.
:type retry: google.api_core.retry.Retry
:param timeout: (Optional) The amount of time, in seconds, to wait for the request
to complete. Note that if retry is specified, the timeout applies to each
individual attempt.
:type timeout: float
:param metadata: (Optional) Additional metadata that is provided to the method.
:type metadata: sequence[tuple[str, str]]]
:rtype: google.cloud.dlp_v2.types.StoredInfoType
"""
client = self.get_conn()
if not stored_info_type_id:
raise AirflowException(
"Please provide the ID of the stored info type to be updated."
)
# Handle project_id from connection configuration
project_id = project_id or self.project_id
if organization_id:
name = DlpServiceClient.organization_stored_info_type_path(
organization_id, stored_info_type_id
)
elif project_id:
name = DlpServiceClient.project_stored_info_type_path(
project_id,
stored_info_type_id
)
else:
raise AirflowException(
"Please provide either organization_id or project_id."
)
return client.update_stored_info_type(
name=name,
config=config,
update_mask=update_mask,
retry=retry,
timeout=timeout,
metadata=metadata,
)