Source code for airflow.contrib.hooks.gcp_dlp_hook

# -*- coding: utf-8 -*-
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

"""
This module contains a CloudDLPHook
which allows you to connect to GCP Cloud DLP service.
"""

import re
import time
from google.cloud.dlp_v2 import DlpServiceClient
from google.cloud.dlp_v2.types import DlpJob

from airflow import AirflowException
from airflow.contrib.hooks.gcp_api_base_hook import GoogleCloudBaseHook

[docs]DLP_JOB_PATH_PATTERN = '^projects/[^/]+/dlpJobs/(?P<job>.*?)$'
# Time to sleep between active checks of the operation results
[docs]TIME_TO_SLEEP_IN_SECONDS = 1
# pylint: disable=R0904, C0302
[docs]class CloudDLPHook(GoogleCloudBaseHook): """ Hook for Google Cloud Data Loss Prevention (DLP) APIs. Cloud DLP allows clients to detect the presence of Personally Identifiable Information (PII) and other privacy-sensitive data in user-supplied, unstructured data streams, like text blocks or images. The service also includes methods for sensitive data redaction and scheduling of data scans on Google Cloud Platform based data sets. :param gcp_conn_id: The connection ID to use when fetching connection info. :type gcp_conn_id: str :param delegate_to: The account to impersonate, if any. For this to work, the service account making the request must have domain-wide delegation enabled. :type delegate_to: str """ def __init__(self, gcp_conn_id="google_cloud_default", delegate_to=None): super(CloudDLPHook, self).__init__(gcp_conn_id, delegate_to) self._client = None
[docs] def get_conn(self): """ Provides a client for interacting with the Cloud DLP API. :return: GCP Cloud DLP API Client :rtype: google.cloud.dlp_v2.DlpServiceClient """ if not self._client: self._client = DlpServiceClient(credentials=self._get_credentials()) return self._client
@GoogleCloudBaseHook.catch_http_exception @GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def cancel_dlp_job( self, dlp_job_id, project_id=None, retry=None, timeout=None, metadata=None ): """ Starts asynchronous cancellation on a long-running DLP job. :param dlp_job_id: ID of the DLP job resource to be cancelled. :type dlp_job_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. If set to None or missing, the default project_id from the GCP connection is used. :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] """ client = self.get_conn() if not dlp_job_id: raise AirflowException( "Please provide the ID of the DLP job resource to be cancelled." ) name = DlpServiceClient.dlp_job_path(project_id, dlp_job_id) client.cancel_dlp_job( name=name, retry=retry, timeout=timeout, metadata=metadata
) @GoogleCloudBaseHook.catch_http_exception
[docs] def create_deidentify_template( self, organization_id=None, project_id=None, deidentify_template=None, template_id=None, retry=None, timeout=None, metadata=None, ): """ Creates a deidentify template for re-using frequently used configuration for de-identifying content, images, and storage. :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organzation. :type organization_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organzation. :type project_id: str :param deidentify_template: (Optional) The deidentify template to create. :type deidentify_template: dict or google.cloud.dlp_v2.types.DeidentifyTemplate :param template_id: (Optional) The template ID. :type template_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.DeidentifyTemplate """ client = self.get_conn() # Handle project_id from connection configuration project_id = project_id or self.project_id if organization_id: parent = DlpServiceClient.organization_path(organization_id) elif project_id: parent = DlpServiceClient.project_path(project_id) else: raise AirflowException( "Please provide either organization_id or project_id." ) return client.create_deidentify_template( parent=parent, deidentify_template=deidentify_template, template_id=template_id, retry=retry, timeout=timeout, metadata=metadata,
) @GoogleCloudBaseHook.catch_http_exception @GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def create_dlp_job( self, project_id=None, inspect_job=None, risk_job=None, job_id=None, retry=None, timeout=None, metadata=None, wait_until_finished=True ): """ Creates a new job to inspect storage or calculate risk metrics. :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. If set to None or missing, the default project_id from the GCP connection is used. :type project_id: str :param inspect_job: (Optional) The configuration for the inspect job. :type inspect_job: dict or google.cloud.dlp_v2.types.InspectJobConfig :param risk_job: (Optional) The configuration for the risk job. :type risk_job: dict or google.cloud.dlp_v2.types.RiskAnalysisJobConfig :param job_id: (Optional) The job ID. :type job_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :param wait_until_finished: (Optional) If true, it will keep polling the job state until it is set to DONE. :type wait_until_finished: bool :rtype: google.cloud.dlp_v2.types.DlpJob """ client = self.get_conn() parent = DlpServiceClient.project_path(project_id) job = client.create_dlp_job( parent=parent, inspect_job=inspect_job, risk_job=risk_job, job_id=job_id, retry=retry, timeout=timeout, metadata=metadata, ) if wait_until_finished: pattern = re.compile(DLP_JOB_PATH_PATTERN, re.IGNORECASE) match = pattern.match(job.name) job_name = match.groupdict()['job'] while wait_until_finished: job = self.get_dlp_job( dlp_job_id=job_name, project_id=project_id) self.log.info( 'DLP job {} state: {}.'.format( job.name, DlpJob.JobState.Name(job.state) ) ) if job.state == DlpJob.JobState.DONE: return job elif job.state in [DlpJob.JobState.PENDING, DlpJob.JobState.RUNNING, DlpJob.JobState.JOB_STATE_UNSPECIFIED]: time.sleep(TIME_TO_SLEEP_IN_SECONDS) else: raise AirflowException( 'Stopped polling DLP job state. DLP job {} state: {}.' .format( job.name, DlpJob.JobState.Name(job.state) ) ) return job
@GoogleCloudBaseHook.catch_http_exception
[docs] def create_inspect_template( self, organization_id=None, project_id=None, inspect_template=None, template_id=None, retry=None, timeout=None, metadata=None, ): """ Creates an inspect template for re-using frequently used configuration for inspecting content, images, and storage. :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organzation. :type organization_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organzation. :type project_id: str :param inspect_template: (Optional) The inspect template to create. :type inspect_template: dict or google.cloud.dlp_v2.types.InspectTemplate :param template_id: (Optional) The template ID. :type template_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.InspectTemplate """ client = self.get_conn() # Handle project_id from connection configuration project_id = project_id or self.project_id if organization_id: parent = DlpServiceClient.organization_path(organization_id) elif project_id: parent = DlpServiceClient.project_path(project_id) else: raise AirflowException( "Please provide either organization_id or project_id." ) return client.create_inspect_template( parent=parent, inspect_template=inspect_template, template_id=template_id, retry=retry, timeout=timeout, metadata=metadata,
) @GoogleCloudBaseHook.catch_http_exception @GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def create_job_trigger( self, project_id=None, job_trigger=None, trigger_id=None, retry=None, timeout=None, metadata=None, ): """ Creates a job trigger to run DLP actions such as scanning storage for sensitive information on a set schedule. :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. If set to None or missing, the default project_id from the GCP connection is used. :type project_id: str :param job_trigger: (Optional) The job trigger to create. :type job_trigger: dict or google.cloud.dlp_v2.types.JobTrigger :param trigger_id: (Optional) The job trigger ID. :type trigger_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.JobTrigger """ client = self.get_conn() parent = DlpServiceClient.project_path(project_id) return client.create_job_trigger( parent=parent, job_trigger=job_trigger, trigger_id=trigger_id, retry=retry, timeout=timeout, metadata=metadata,
) @GoogleCloudBaseHook.catch_http_exception
[docs] def create_stored_info_type( self, organization_id=None, project_id=None, config=None, stored_info_type_id=None, retry=None, timeout=None, metadata=None, ): """ Creates a pre-built stored info type to be used for inspection. :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organzation. :type organization_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organzation. :type project_id: str :param config: (Optional) The config for the stored info type. :type config: dict or google.cloud.dlp_v2.types.StoredInfoTypeConfig :param stored_info_type_id: (Optional) The stored info type ID. :type stored_info_type_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.StoredInfoType """ client = self.get_conn() # Handle project_id from connection configuration project_id = project_id or self.project_id if organization_id: parent = DlpServiceClient.organization_path(organization_id) elif project_id: parent = DlpServiceClient.project_path(project_id) else: raise AirflowException( "Please provide either organization_id or project_id." ) return client.create_stored_info_type( parent=parent, config=config, stored_info_type_id=stored_info_type_id, retry=retry, timeout=timeout, metadata=metadata,
) @GoogleCloudBaseHook.catch_http_exception @GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def deidentify_content( self, project_id=None, deidentify_config=None, inspect_config=None, item=None, inspect_template_name=None, deidentify_template_name=None, retry=None, timeout=None, metadata=None, ): """ De-identifies potentially sensitive info from a content item. This method has limits on input size and output size. :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. If set to None or missing, the default project_id from the GCP connection is used. :type project_id: str :param deidentify_config: (Optional) Configuration for the de-identification of the content item. Items specified here will override the template referenced by the deidentify_template_name argument. :type deidentify_config: dict or google.cloud.dlp_v2.types.DeidentifyConfig :param inspect_config: (Optional) Configuration for the inspector. Items specified here will override the template referenced by the inspect_template_name argument. :type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig :param item: (Optional) The item to de-identify. Will be treated as text. :type item: dict or google.cloud.dlp_v2.types.ContentItem :param inspect_template_name: (Optional) Optional template to use. Any configuration directly specified in inspect_config will override those set in the template. :type inspect_template_name: str :param deidentify_template_name: (Optional) Optional template to use. Any configuration directly specified in deidentify_config will override those set in the template. :type deidentify_template_name: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.DeidentifyContentResponse """ client = self.get_conn() parent = DlpServiceClient.project_path(project_id) return client.deidentify_content( parent=parent, deidentify_config=deidentify_config, inspect_config=inspect_config, item=item, inspect_template_name=inspect_template_name, deidentify_template_name=deidentify_template_name, retry=retry, timeout=timeout, metadata=metadata,
) @GoogleCloudBaseHook.catch_http_exception
[docs] def delete_deidentify_template( self, template_id, organization_id=None, project_id=None, retry=None, timeout=None, metadata=None ): """ Deletes a deidentify template. :param template_id: The ID of deidentify template to be deleted. :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organzation. :type organization_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organzation. :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] """ client = self.get_conn() if not template_id: raise AirflowException( "Please provide the ID of deidentify template to be deleted." ) # Handle project_id from connection configuration project_id = project_id or self.project_id if organization_id: name = DlpServiceClient.organization_deidentify_template_path(organization_id, template_id) elif project_id: name = DlpServiceClient.project_deidentify_template_path(project_id, template_id) else: raise AirflowException( "Please provide either organization_id or project_id." ) client.delete_deidentify_template( name=name, retry=retry, timeout=timeout, metadata=metadata
) @GoogleCloudBaseHook.catch_http_exception @GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def delete_dlp_job( self, dlp_job_id, project_id=None, retry=None, timeout=None, metadata=None ): """ Deletes a long-running DLP job. This method indicates that the client is no longer interested in the DLP job result. The job will be cancelled if possible. :param dlp_job_id: The ID of the DLP job resource to be cancelled. :type dlp_job_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. If set to None or missing, the default project_id from the GCP connection is used. :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] """ client = self.get_conn() if not dlp_job_id: raise AirflowException( "Please provide the ID of the DLP job resource to be cancelled." ) name = DlpServiceClient.dlp_job_path(project_id, dlp_job_id) client.delete_dlp_job( name=name, retry=retry, timeout=timeout, metadata=metadata
) @GoogleCloudBaseHook.catch_http_exception
[docs] def delete_inspect_template( self, template_id, organization_id=None, project_id=None, retry=None, timeout=None, metadata=None ): """ Deletes an inspect template. :param template_id: The ID of the inspect template to be deleted. :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organzation. :type organization_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organzation. :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] """ client = self.get_conn() if not template_id: raise AirflowException( "Please provide the ID of the inspect template to be deleted." ) # Handle project_id from connection configuration project_id = project_id or self.project_id if organization_id: name = DlpServiceClient.organization_inspect_template_path(organization_id, template_id) elif project_id: name = DlpServiceClient.project_inspect_template_path(project_id, template_id) else: raise AirflowException( "Please provide either organization_id or project_id." ) client.delete_inspect_template( name=name, retry=retry, timeout=timeout, metadata=metadata
) @GoogleCloudBaseHook.catch_http_exception @GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def delete_job_trigger( self, job_trigger_id, project_id=None, retry=None, timeout=None, metadata=None ): """ Deletes a job trigger. :param job_trigger_id: The ID of the DLP job trigger to be deleted. :type job_trigger_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. If set to None or missing, the default project_id from the GCP connection is used. :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] """ client = self.get_conn() if not job_trigger_id: raise AirflowException( "Please provide the ID of the DLP job trigger to be deleted." ) name = DlpServiceClient.project_job_trigger_path(project_id, job_trigger_id) client.delete_job_trigger( name=name, retry=retry, timeout=timeout, metadata=metadata
) @GoogleCloudBaseHook.catch_http_exception
[docs] def delete_stored_info_type( self, stored_info_type_id, organization_id=None, project_id=None, retry=None, timeout=None, metadata=None ): """ Deletes a stored info type. :param stored_info_type_id: The ID of the stored info type to be deleted. :type stored_info_type_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organzation. :type organization_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organzation. :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] """ client = self.get_conn() if not stored_info_type_id: raise AirflowException( "Please provide the ID of the stored info type to be deleted." ) # Handle project_id from connection configuration project_id = project_id or self.project_id if organization_id: name = DlpServiceClient.organization_stored_info_type_path( organization_id, stored_info_type_id ) elif project_id: name = DlpServiceClient.project_stored_info_type_path( project_id, stored_info_type_id ) else: raise AirflowException( "Please provide either organization_id or project_id." ) client.delete_stored_info_type( name=name, retry=retry, timeout=timeout, metadata=metadata
) @GoogleCloudBaseHook.catch_http_exception
[docs] def get_deidentify_template( self, template_id, organization_id=None, project_id=None, retry=None, timeout=None, metadata=None ): """ Gets a deidentify template. :param template_id: The ID of deidentify template to be read. :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organzation. :type organization_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organzation. :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.DeidentifyTemplate """ client = self.get_conn() if not template_id: raise AirflowException( "Please provide the ID of the deidentify template to be read." ) # Handle project_id from connection configuration project_id = project_id or self.project_id if organization_id: name = DlpServiceClient.organization_deidentify_template_path(organization_id, template_id) elif project_id: name = DlpServiceClient.project_deidentify_template_path(project_id, template_id) else: raise AirflowException( "Please provide either organization_id or project_id." ) return client.get_deidentify_template( name=name, retry=retry, timeout=timeout, metadata=metadata
) @GoogleCloudBaseHook.catch_http_exception @GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def get_dlp_job( self, dlp_job_id, project_id=None, retry=None, timeout=None, metadata=None ): """ Gets the latest state of a long-running Dlp Job. :param dlp_job_id: The ID of the DLP job resource to be read. :type dlp_job_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. If set to None or missing, the default project_id from the GCP connection is used. :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.DlpJob """ client = self.get_conn() if not dlp_job_id: raise AirflowException( "Please provide the ID of the DLP job resource to be read." ) name = DlpServiceClient.dlp_job_path(project_id, dlp_job_id) return client.get_dlp_job( name=name, retry=retry, timeout=timeout, metadata=metadata
) @GoogleCloudBaseHook.catch_http_exception
[docs] def get_inspect_template( self, template_id, organization_id=None, project_id=None, retry=None, timeout=None, metadata=None ): """ Gets an inspect template. :param template_id: The ID of inspect template to be read. :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organzation. :type organization_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organzation. :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.InspectTemplate """ client = self.get_conn() if not template_id: raise AirflowException( "Please provide the ID of the inspect template to be read." ) # Handle project_id from connection configuration project_id = project_id or self.project_id if organization_id: name = DlpServiceClient.organization_inspect_template_path(organization_id, template_id) elif project_id: name = DlpServiceClient.project_inspect_template_path(project_id, template_id) else: raise AirflowException( "Please provide either organization_id or project_id." ) return client.get_inspect_template( name=name, retry=retry, timeout=timeout, metadata=metadata
) @GoogleCloudBaseHook.catch_http_exception @GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def get_job_trigger( self, job_trigger_id, project_id=None, retry=None, timeout=None, metadata=None ): """ Gets a DLP job trigger. :param job_trigger_id: The ID of the DLP job trigger to be read. :type job_trigger_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. If set to None or missing, the default project_id from the GCP connection is used. :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.JobTrigger """ client = self.get_conn() if not job_trigger_id: raise AirflowException( "Please provide the ID of the DLP job trigger to be read." ) name = DlpServiceClient.project_job_trigger_path(project_id, job_trigger_id) return client.get_job_trigger( name=name, retry=retry, timeout=timeout, metadata=metadata
) @GoogleCloudBaseHook.catch_http_exception
[docs] def get_stored_info_type( self, stored_info_type_id, organization_id=None, project_id=None, retry=None, timeout=None, metadata=None ): """ Gets a stored info type. :param stored_info_type_id: The ID of the stored info type to be read. :type stored_info_type_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organzation. :type organization_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organzation. :type project_id: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.StoredInfoType """ client = self.get_conn() if not stored_info_type_id: raise AirflowException( "Please provide the ID of the stored info type to be read." ) # Handle project_id from connection configuration project_id = project_id or self.project_id if organization_id: name = DlpServiceClient.organization_stored_info_type_path( organization_id, stored_info_type_id ) elif project_id: name = DlpServiceClient.project_stored_info_type_path( project_id, stored_info_type_id ) else: raise AirflowException( "Please provide either organization_id or project_id." ) return client.get_stored_info_type( name=name, retry=retry, timeout=timeout, metadata=metadata
) @GoogleCloudBaseHook.catch_http_exception @GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def inspect_content( self, project_id=None, inspect_config=None, item=None, inspect_template_name=None, retry=None, timeout=None, metadata=None, ): """ Finds potentially sensitive info in content. This method has limits on input size, processing time, and output size. :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. If set to None or missing, the default project_id from the GCP connection is used. :type project_id: str :param inspect_config: (Optional) Configuration for the inspector. Items specified here will override the template referenced by the inspect_template_name argument. :type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig :param item: (Optional) The item to de-identify. Will be treated as text. :type item: dict or google.cloud.dlp_v2.types.ContentItem :param inspect_template_name: (Optional) Optional template to use. Any configuration directly specified in inspect_config will override those set in the template. :type inspect_template_name: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.InspectContentResponse """ client = self.get_conn() parent = DlpServiceClient.project_path(project_id) return client.inspect_content( parent=parent, inspect_config=inspect_config, item=item, inspect_template_name=inspect_template_name, retry=retry, timeout=timeout, metadata=metadata,
) @GoogleCloudBaseHook.catch_http_exception
[docs] def list_deidentify_templates( self, organization_id=None, project_id=None, page_size=None, order_by=None, retry=None, timeout=None, metadata=None, ): """ Lists deidentify templates. :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organzation. :type organization_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organzation. :type project_id: str :param page_size: (Optional) The maximum number of resources contained in the underlying API response. :type page_size: int :param order_by: (Optional) Optional comma separated list of fields to order by, followed by asc or desc postfix. :type order_by: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: list[google.cloud.dlp_v2.types.DeidentifyTemplate] """ client = self.get_conn() # Handle project_id from connection configuration project_id = project_id or self.project_id if organization_id: parent = DlpServiceClient.organization_path(organization_id) elif project_id: parent = DlpServiceClient.project_path(project_id) else: raise AirflowException( "Please provide either organization_id or project_id." ) results = client.list_deidentify_templates( parent=parent, page_size=page_size, order_by=order_by, retry=retry, timeout=timeout, metadata=metadata, ) return list(results)
@GoogleCloudBaseHook.catch_http_exception @GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def list_dlp_jobs( self, project_id=None, results_filter=None, page_size=None, job_type=None, order_by=None, retry=None, timeout=None, metadata=None, ): """ Lists DLP jobs that match the specified filter in the request. :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. If set to None or missing, the default project_id from the GCP connection is used. :type project_id: str :param results_filter: (Optional) Filter used to specify a subset of results. :type results_filter: str :param page_size: (Optional) The maximum number of resources contained in the underlying API response. :type page_size: int :param job_type: (Optional) The type of job. :type job_type: str :param order_by: (Optional) Optional comma separated list of fields to order by, followed by asc or desc postfix. :type order_by: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: list[google.cloud.dlp_v2.types.DlpJob] """ client = self.get_conn() parent = DlpServiceClient.project_path(project_id) results = client.list_dlp_jobs( parent=parent, filter_=results_filter, page_size=page_size, type_=job_type, order_by=order_by, retry=retry, timeout=timeout, metadata=metadata, ) return list(results)
@GoogleCloudBaseHook.catch_http_exception
[docs] def list_info_types( self, language_code=None, results_filter=None, retry=None, timeout=None, metadata=None, ): """ Returns a list of the sensitive information types that the DLP API supports. :param language_code: (Optional) Optional BCP-47 language code for localized info type friendly names. If omitted, or if localized strings are not available, en-US strings will be returned. :type language_code: str :param results_filter: (Optional) Filter used to specify a subset of results. :type results_filter: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.ListInfoTypesResponse """ client = self.get_conn() return client.list_info_types( language_code=language_code, filter_=results_filter, retry=retry, timeout=timeout, metadata=metadata,
) @GoogleCloudBaseHook.catch_http_exception
[docs] def list_inspect_templates( self, organization_id=None, project_id=None, page_size=None, order_by=None, retry=None, timeout=None, metadata=None, ): """ Lists inspect templates. :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organzation. :type organization_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organzation. :type project_id: str :param page_size: (Optional) The maximum number of resources contained in the underlying API response. :type page_size: int :param order_by: (Optional) Optional comma separated list of fields to order by, followed by asc or desc postfix. :type order_by: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: list[google.cloud.dlp_v2.types.InspectTemplate] """ client = self.get_conn() # Handle project_id from connection configuration project_id = project_id or self.project_id if organization_id: parent = DlpServiceClient.organization_path(organization_id) elif project_id: parent = DlpServiceClient.project_path(project_id) else: raise AirflowException( "Please provide either organization_id or project_id." ) results = client.list_inspect_templates( parent=parent, page_size=page_size, order_by=order_by, retry=retry, timeout=timeout, metadata=metadata, ) return list(results)
@GoogleCloudBaseHook.catch_http_exception @GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def list_job_triggers( self, project_id=None, page_size=None, order_by=None, results_filter=None, retry=None, timeout=None, metadata=None, ): """ Lists job triggers. :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. If set to None or missing, the default project_id from the GCP connection is used. :type project_id: str :param page_size: (Optional) The maximum number of resources contained in the underlying API response. :type page_size: int :param order_by: (Optional) Optional comma separated list of fields to order by, followed by asc or desc postfix. :type order_by: str :param results_filter: (Optional) Filter used to specify a subset of results. :type results_filter: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: list[google.cloud.dlp_v2.types.JobTrigger] """ client = self.get_conn() parent = DlpServiceClient.project_path(project_id) results = client.list_job_triggers( parent=parent, page_size=page_size, order_by=order_by, filter_=results_filter, retry=retry, timeout=timeout, metadata=metadata, ) return list(results)
@GoogleCloudBaseHook.catch_http_exception
[docs] def list_stored_info_types( self, organization_id=None, project_id=None, page_size=None, order_by=None, retry=None, timeout=None, metadata=None, ): """ Lists stored info types. :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organzation. :type organization_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organzation. :type project_id: str :param page_size: (Optional) The maximum number of resources contained in the underlying API response. :type page_size: int :param order_by: (Optional) Optional comma separated list of fields to order by, followed by asc or desc postfix. :type order_by: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: list[google.cloud.dlp_v2.types.StoredInfoType] """ client = self.get_conn() # Handle project_id from connection configuration project_id = project_id or self.project_id if organization_id: parent = DlpServiceClient.organization_path(organization_id) elif project_id: parent = DlpServiceClient.project_path(project_id) else: raise AirflowException( "Please provide either organization_id or project_id." ) results = client.list_stored_info_types( parent=parent, page_size=page_size, order_by=order_by, retry=retry, timeout=timeout, metadata=metadata, ) return list(results)
@GoogleCloudBaseHook.catch_http_exception @GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def redact_image( self, project_id=None, inspect_config=None, image_redaction_configs=None, include_findings=None, byte_item=None, retry=None, timeout=None, metadata=None, ): """ Redacts potentially sensitive info from an image. This method has limits on input size, processing time, and output size. :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. If set to None or missing, the default project_id from the GCP connection is used. :type project_id: str :param inspect_config: (Optional) Configuration for the inspector. Items specified here will override the template referenced by the inspect_template_name argument. :type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig :param image_redaction_configs: (Optional) The configuration for specifying what content to redact from images. :type image_redaction_configs: list[dict] or list[google.cloud.dlp_v2.types.ImageRedactionConfig] :param include_findings: (Optional) Whether the response should include findings along with the redacted image. :type include_findings: bool :param byte_item: (Optional) The content must be PNG, JPEG, SVG or BMP. :type byte_item: dict or google.cloud.dlp_v2.types.ByteContentItem :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.RedactImageResponse """ client = self.get_conn() parent = DlpServiceClient.project_path(project_id) return client.redact_image( parent=parent, inspect_config=inspect_config, image_redaction_configs=image_redaction_configs, include_findings=include_findings, byte_item=byte_item, retry=retry, timeout=timeout, metadata=metadata,
) @GoogleCloudBaseHook.catch_http_exception @GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def reidentify_content( self, project_id=None, reidentify_config=None, inspect_config=None, item=None, inspect_template_name=None, reidentify_template_name=None, retry=None, timeout=None, metadata=None, ): """ Re-identifies content that has been de-identified. :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. If set to None or missing, the default project_id from the GCP connection is used. :type project_id: str :param reidentify_config: (Optional) Configuration for the re-identification of the content item. :type reidentify_config: dict or google.cloud.dlp_v2.types.DeidentifyConfig :param inspect_config: (Optional) Configuration for the inspector. :type inspect_config: dict or google.cloud.dlp_v2.types.InspectConfig :param item: (Optional) The item to re-identify. Will be treated as text. :type item: dict or google.cloud.dlp_v2.types.ContentItem :param inspect_template_name: (Optional) Optional template to use. Any configuration directly specified in inspect_config will override those set in the template. :type inspect_template_name: str :param reidentify_template_name: (Optional) Optional template to use. References an instance of deidentify template. Any configuration directly specified in reidentify_config or inspect_config will override those set in the template. :type reidentify_template_name: str :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.ReidentifyContentResponse """ client = self.get_conn() parent = DlpServiceClient.project_path(project_id) return client.reidentify_content( parent=parent, reidentify_config=reidentify_config, inspect_config=inspect_config, item=item, inspect_template_name=inspect_template_name, reidentify_template_name=reidentify_template_name, retry=retry, timeout=timeout, metadata=metadata,
) @GoogleCloudBaseHook.catch_http_exception
[docs] def update_deidentify_template( self, template_id, organization_id=None, project_id=None, deidentify_template=None, update_mask=None, retry=None, timeout=None, metadata=None, ): """ Updates the deidentify template. :param template_id: The ID of deidentify template to be updated. :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organzation. :type organization_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organzation. :type project_id: str :param deidentify_template: New deidentify template value. :type deidentify_template: dict or google.cloud.dlp_v2.types.DeidentifyTemplate :param update_mask: Mask to control which fields get updated. :type update_mask: dict or google.cloud.dlp_v2.types.FieldMask :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.DeidentifyTemplate """ client = self.get_conn() if not template_id: raise AirflowException( "Please provide the ID of deidentify template to be updated." ) # Handle project_id from connection configuration project_id = project_id or self.project_id if organization_id: name = DlpServiceClient.organization_deidentify_template_path(organization_id, template_id) elif project_id: name = DlpServiceClient.project_deidentify_template_path(project_id, template_id) else: raise AirflowException( "Please provide either organization_id or project_id." ) return client.update_deidentify_template( name=name, deidentify_template=deidentify_template, update_mask=update_mask, retry=retry, timeout=timeout, metadata=metadata,
) @GoogleCloudBaseHook.catch_http_exception
[docs] def update_inspect_template( self, template_id, organization_id=None, project_id=None, inspect_template=None, update_mask=None, retry=None, timeout=None, metadata=None, ): """ Updates the inspect template. :param template_id: The ID of the inspect template to be updated. :type template_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organzation. :type organization_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organzation. :type project_id: str :param inspect_template: New inspect template value. :type inspect_template: dict or google.cloud.dlp_v2.types.InspectTemplate :param update_mask: Mask to control which fields get updated. :type update_mask: dict or google.cloud.dlp_v2.types.FieldMask :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.InspectTemplate """ client = self.get_conn() if not template_id: raise AirflowException( "Please provide the ID of the inspect template to be updated." ) # Handle project_id from connection configuration project_id = project_id or self.project_id if organization_id: name = DlpServiceClient.organization_inspect_template_path(organization_id, template_id) elif project_id: name = DlpServiceClient.project_inspect_template_path(project_id, template_id) else: raise AirflowException( "Please provide either organization_id or project_id." ) return client.update_inspect_template( name=name, inspect_template=inspect_template, update_mask=update_mask, retry=retry, timeout=timeout, metadata=metadata,
) @GoogleCloudBaseHook.catch_http_exception @GoogleCloudBaseHook.fallback_to_default_project_id
[docs] def update_job_trigger( self, job_trigger_id, project_id=None, job_trigger=None, update_mask=None, retry=None, timeout=None, metadata=None, ): """ Updates a job trigger. :param job_trigger_id: The ID of the DLP job trigger to be updated. :type job_trigger_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. If set to None or missing, the default project_id from the GCP connection is used. :type project_id: str :param job_trigger: New job trigger value. :type job_trigger: dict or google.cloud.dlp_v2.types.JobTrigger :param update_mask: Mask to control which fields get updated. :type update_mask: dict or google.cloud.dlp_v2.types.FieldMask :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.JobTrigger """ client = self.get_conn() if not job_trigger_id: raise AirflowException( "Please provide the ID of the DLP job trigger to be updated." ) name = DlpServiceClient.project_job_trigger_path(project_id, job_trigger_id) return client.update_job_trigger( name=name, job_trigger=job_trigger, update_mask=update_mask, retry=retry, timeout=timeout, metadata=metadata,
) @GoogleCloudBaseHook.catch_http_exception
[docs] def update_stored_info_type( self, stored_info_type_id, organization_id=None, project_id=None, config=None, update_mask=None, retry=None, timeout=None, metadata=None, ): """ Updates the stored info type by creating a new version. :param stored_info_type_id: The ID of the stored info type to be updated. :type stored_info_type_id: str :param organization_id: (Optional) The organization ID. Required to set this field if parent resource is an organzation. :type organization_id: str :param project_id: (Optional) Google Cloud Platform project ID where the DLP Instance exists. Only set this field if the parent resource is a project instead of an organzation. :type project_id: str :param config: Updated configuration for the stored info type. If not provided, a new version of the stored info type will be created with the existing configuration. :type config: dict or google.cloud.dlp_v2.types.StoredInfoTypeConfig :param update_mask: Mask to control which fields get updated. :type update_mask: dict or google.cloud.dlp_v2.types.FieldMask :param retry: (Optional) A retry object used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: (Optional) The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: (Optional) Additional metadata that is provided to the method. :type metadata: sequence[tuple[str, str]]] :rtype: google.cloud.dlp_v2.types.StoredInfoType """ client = self.get_conn() if not stored_info_type_id: raise AirflowException( "Please provide the ID of the stored info type to be updated." ) # Handle project_id from connection configuration project_id = project_id or self.project_id if organization_id: name = DlpServiceClient.organization_stored_info_type_path( organization_id, stored_info_type_id ) elif project_id: name = DlpServiceClient.project_stored_info_type_path( project_id, stored_info_type_id ) else: raise AirflowException( "Please provide either organization_id or project_id." ) return client.update_stored_info_type( name=name, config=config, update_mask=update_mask, retry=retry, timeout=timeout, metadata=metadata,
)