Source code for tests.system.providers.apache.beam.utils

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Example Utils for Apache Beam operator example DAG's
"""

from __future__ import annotations

import os
from datetime import datetime
from urllib.parse import urlsplit

from airflow.utils.trigger_rule import TriggerRule

[docs]GCP_PROJECT_ID = os.environ.get("GCP_PROJECT_ID", "example-project")
[docs]GCS_INPUT = os.environ.get("APACHE_BEAM_PYTHON", "gs://INVALID BUCKET NAME/shakespeare/kinglear.txt")
[docs]GCS_TMP = os.environ.get("APACHE_BEAM_GCS_TMP", "gs://INVALID BUCKET NAME/temp/")
[docs]GCS_STAGING = os.environ.get("APACHE_BEAM_GCS_STAGING", "gs://INVALID BUCKET NAME/staging/")
[docs]GCS_OUTPUT = os.environ.get("APACHE_BEAM_GCS_OUTPUT", "gs://INVALID BUCKET NAME/output")
[docs]GCS_PYTHON = os.environ.get("APACHE_BEAM_PYTHON", "gs://INVALID BUCKET NAME/wordcount_debugging.py")
[docs]GCS_PYTHON_DATAFLOW_ASYNC = os.environ.get( "APACHE_BEAM_PYTHON_DATAFLOW_ASYNC", "gs://INVALID BUCKET NAME/wordcount_debugging.py" )
[docs]GCS_GO = os.environ.get("APACHE_BEAM_GO", "gs://INVALID BUCKET NAME/wordcount_debugging.go")
[docs]GCS_GO_DATAFLOW_ASYNC = os.environ.get( "APACHE_BEAM_GO_DATAFLOW_ASYNC", "gs://INVALID BUCKET NAME/wordcount_debugging.go" )
[docs]GCS_JAR_DIRECT_RUNNER = os.environ.get( "APACHE_BEAM_DIRECT_RUNNER_JAR", "gs://INVALID BUCKET NAME/tests/dataflow-templates-bundled-java=11-beam-v2.25.0-DirectRunner.jar", )
[docs]GCS_JAR_DATAFLOW_RUNNER = os.environ.get( "APACHE_BEAM_DATAFLOW_RUNNER_JAR", "gs://INVALID BUCKET NAME/word-count-beam-bundled-0.1.jar" )
[docs]GCS_JAR_SPARK_RUNNER = os.environ.get( "APACHE_BEAM_SPARK_RUNNER_JAR", "gs://INVALID BUCKET NAME/tests/dataflow-templates-bundled-java=11-beam-v2.25.0-SparkRunner.jar", )
[docs]GCS_JAR_DIRECT_RUNNER_PARTS = urlsplit(GCS_JAR_DIRECT_RUNNER)
[docs]GCS_JAR_DIRECT_RUNNER_BUCKET_NAME = GCS_JAR_DIRECT_RUNNER_PARTS.netloc
[docs]GCS_JAR_DIRECT_RUNNER_OBJECT_NAME = GCS_JAR_DIRECT_RUNNER_PARTS.path[1:]
[docs]GCS_JAR_DATAFLOW_RUNNER_PARTS = urlsplit(GCS_JAR_DATAFLOW_RUNNER)
[docs]GCS_JAR_DATAFLOW_RUNNER_BUCKET_NAME = GCS_JAR_DATAFLOW_RUNNER_PARTS.netloc
[docs]GCS_JAR_DATAFLOW_RUNNER_OBJECT_NAME = GCS_JAR_DATAFLOW_RUNNER_PARTS.path[1:]
[docs]GCS_JAR_SPARK_RUNNER_PARTS = urlsplit(GCS_JAR_SPARK_RUNNER)
[docs]GCS_JAR_SPARK_RUNNER_BUCKET_NAME = GCS_JAR_SPARK_RUNNER_PARTS.netloc
[docs]GCS_JAR_SPARK_RUNNER_OBJECT_NAME = GCS_JAR_SPARK_RUNNER_PARTS.path[1:]
[docs]DEFAULT_ARGS = { "default_pipeline_options": {"output": "/tmp/example_beam"}, "trigger_rule": TriggerRule.ALL_DONE, }
[docs]START_DATE = datetime(2021, 1, 1)

Was this entry helpful?