Source code for airflow.providers.google.cloud.operators.workflows
# Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.importhashlibimportjsonimportreimportuuidfromdatetimeimportdatetime,timedeltafromtypingimportDict,Optional,Sequence,Tuple,Unionimportpytzfromgoogle.api_core.exceptionsimportAlreadyExistsfromgoogle.api_core.retryimportRetryfromgoogle.cloud.workflows.executions_v1betaimportExecutionfromgoogle.cloud.workflows_v1betaimportWorkflowfromgoogle.protobuf.field_mask_pb2importFieldMaskfromairflow.modelsimportBaseOperatorfromairflow.providers.google.cloud.hooks.workflowsimportWorkflowsHook
[docs]classWorkflowsCreateWorkflowOperator(BaseOperator):""" Creates a new workflow. If a workflow with the specified name already exists in the specified project and location, the long running operation will return [ALREADY_EXISTS][google.rpc.Code.ALREADY_EXISTS] error. .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:WorkflowsCreateWorkflowOperator` :param workflow: Required. Workflow to be created. :type workflow: Dict :param workflow_id: Required. The ID of the workflow to be created. :type workflow_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. :type project_id: str :param location: Required. The GCP region in which to handle the request. :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: Additional metadata that is provided to the method. :type metadata: Sequence[Tuple[str, str]] """
def__init__(self,*,workflow:Dict,workflow_id:str,location:str,project_id:Optional[str]=None,retry:Optional[Retry]=None,timeout:Optional[float]=None,metadata:Optional[Sequence[Tuple[str,str]]]=None,gcp_conn_id:str="google_cloud_default",force_rerun:bool=False,impersonation_chain:Optional[Union[str,Sequence[str]]]=None,**kwargs,):super().__init__(**kwargs)self.workflow=workflowself.workflow_id=workflow_idself.location=locationself.project_id=project_idself.retry=retryself.timeout=timeoutself.metadata=metadataself.gcp_conn_id=gcp_conn_idself.impersonation_chain=impersonation_chainself.force_rerun=force_rerundef_workflow_id(self,context):ifself.workflow_idandnotself.force_rerun:# If users provide workflow id then assuring the idempotency# is on their sidereturnself.workflow_idifself.force_rerun:hash_base=str(uuid.uuid4())else:hash_base=json.dumps(self.workflow,sort_keys=True)# We are limited by allowed length of workflow_id so# we use hash of whole informationexec_date=context['execution_date'].isoformat()base=f"airflow_{self.dag_id}_{self.task_id}_{exec_date}_{hash_base}"workflow_id=hashlib.md5(base.encode()).hexdigest()returnre.sub(r"[:\-+.]","_",workflow_id)
[docs]classWorkflowsUpdateWorkflowOperator(BaseOperator):""" Updates an existing workflow. Running this method has no impact on already running executions of the workflow. A new revision of the workflow may be created as a result of a successful update operation. In that case, such revision will be used in new workflow executions. .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:WorkflowsUpdateWorkflowOperator` :param workflow_id: Required. The ID of the workflow to be updated. :type workflow_id: str :param location: Required. The GCP region in which to handle the request. :type location: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. :type project_id: str :param update_mask: List of fields to be updated. If not present, the entire workflow will be updated. :type update_mask: FieldMask :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: Additional metadata that is provided to the method. :type metadata: Sequence[Tuple[str, str]] """
[docs]classWorkflowsDeleteWorkflowOperator(BaseOperator):""" Deletes a workflow with the specified name. This method also cancels and deletes all running executions of the workflow. .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:WorkflowsDeleteWorkflowOperator` :param workflow_id: Required. The ID of the workflow to be created. :type workflow_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. :type project_id: str :param location: Required. The GCP region in which to handle the request. :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: Additional metadata that is provided to the method. :type metadata: Sequence[Tuple[str, str]] """
[docs]classWorkflowsListWorkflowsOperator(BaseOperator):""" Lists Workflows in a given project and location. The default order is not specified. .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:WorkflowsListWorkflowsOperator` :param filter_: Filter to restrict results to specific workflows. :type filter_: str :param order_by: Comma-separated list of fields that that specify the order of the results. Default sorting order for a field is ascending. To specify descending order for a field, append a "desc" suffix. If not specified, the results will be returned in an unspecified order. :type order_by: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. :type project_id: str :param location: Required. The GCP region in which to handle the request. :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: Additional metadata that is provided to the method. :type metadata: Sequence[Tuple[str, str]] """
[docs]classWorkflowsGetWorkflowOperator(BaseOperator):""" Gets details of a single Workflow. .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:WorkflowsGetWorkflowOperator` :param workflow_id: Required. The ID of the workflow to be created. :type workflow_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. :type project_id: str :param location: Required. The GCP region in which to handle the request. :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: Additional metadata that is provided to the method. :type metadata: Sequence[Tuple[str, str]] """
[docs]classWorkflowsCreateExecutionOperator(BaseOperator):""" Creates a new execution using the latest revision of the given workflow. .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:WorkflowsCreateExecutionOperator` :param execution: Required. Execution to be created. :type execution: Dict :param workflow_id: Required. The ID of the workflow. :type workflow_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. :type project_id: str :param location: Required. The GCP region in which to handle the request. :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: Additional metadata that is provided to the method. :type metadata: Sequence[Tuple[str, str]] """
[docs]classWorkflowsCancelExecutionOperator(BaseOperator):""" Cancels an execution using the given ``workflow_id`` and ``execution_id``. .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:WorkflowsCancelExecutionOperator` :param workflow_id: Required. The ID of the workflow. :type workflow_id: str :param execution_id: Required. The ID of the execution. :type execution_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. :type project_id: str :param location: Required. The GCP region in which to handle the request. :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: Additional metadata that is provided to the method. :type metadata: Sequence[Tuple[str, str]] """
[docs]classWorkflowsListExecutionsOperator(BaseOperator):""" Returns a list of executions which belong to the workflow with the given name. The method returns executions of all workflow revisions. Returned executions are ordered by their start time (newest first). .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:WorkflowsListExecutionsOperator` :param workflow_id: Required. The ID of the workflow to be created. :type workflow_id: str :param start_date_filter: If passed only executions older that this date will be returned. By default operators return executions from last 60 minutes :type start_date_filter: datetime :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. :type project_id: str :param location: Required. The GCP region in which to handle the request. :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: Additional metadata that is provided to the method. :type metadata: Sequence[Tuple[str, str]] """
[docs]defexecute(self,context):hook=WorkflowsHook(gcp_conn_id=self.gcp_conn_id,impersonation_chain=self.impersonation_chain)self.log.info("Retrieving executions for workflow %s",self.workflow_id)execution_iter=hook.list_executions(workflow_id=self.workflow_id,location=self.location,project_id=self.project_id,retry=self.retry,timeout=self.timeout,metadata=self.metadata,)return[Execution.to_dict(e)foreinexecution_iterife.start_time>self.start_date_filter]
[docs]classWorkflowsGetExecutionOperator(BaseOperator):""" Returns an execution for the given ``workflow_id`` and ``execution_id``. .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:WorkflowsGetExecutionOperator` :param workflow_id: Required. The ID of the workflow. :type workflow_id: str :param execution_id: Required. The ID of the execution. :type execution_id: str :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. :type project_id: str :param location: Required. The GCP region in which to handle the request. :type location: str :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if ``retry`` is specified, the timeout applies to each individual attempt. :type timeout: float :param metadata: Additional metadata that is provided to the method. :type metadata: Sequence[Tuple[str, str]] """