Source code for tests.system.providers.amazon.aws.example_emr
## Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.from__future__importannotationsimportjsonfromdatetimeimportdatetimeimportboto3fromairflowimportDAGfromairflow.decoratorsimporttaskfromairflow.models.baseoperatorimportchainfromairflow.providers.amazon.aws.hooks.ssmimportSsmHookfromairflow.providers.amazon.aws.operators.emrimport(EmrAddStepsOperator,EmrCreateJobFlowOperator,EmrModifyClusterOperator,EmrTerminateJobFlowOperator,)fromairflow.providers.amazon.aws.operators.s3importS3CreateBucketOperator,S3DeleteBucketOperatorfromairflow.providers.amazon.aws.sensors.emrimportEmrJobFlowSensor,EmrStepSensorfromairflow.utils.trigger_ruleimportTriggerRulefromtests.system.providers.amazon.aws.utilsimportENV_ID_KEY,SystemTestContextBuilder
[docs]SECURITY_CONFIGURATION={"AuthorizationConfiguration":{"IAMConfiguration":{"EnableApplicationScopedIAMRole":True,},},# Use IMDSv2 for greater security, see the following doc for more details:# https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-create-security-configuration.html"InstanceMetadataServiceConfiguration":{"MinimumInstanceMetadataServiceVersion":2,"HttpPutResponseHopLimit":2,},}
[docs]defget_ami_id():""" Returns an AL2 AMI compatible with EMR """returnSsmHook(aws_conn_id=None).get_parameter_value("/aws/service/ami-amazon-linux-latest/amzn2-ami-hvm-x86_64-ebs")
env_id=test_context[ENV_ID_KEY]config_name=f"{CONFIG_NAME}-{env_id}"execution_role_arn=test_context[EXECUTION_ROLE_ARN_KEY]s3_bucket=f"{env_id}-emr-bucket"JOB_FLOW_OVERRIDES["LogUri"]=f"s3://{s3_bucket}/"JOB_FLOW_OVERRIDES["SecurityConfiguration"]=config_nameJOB_FLOW_OVERRIDES["Instances"]["InstanceGroups"][0]["CustomAmiId"]=get_ami_id()create_s3_bucket=S3CreateBucketOperator(task_id="create_s3_bucket",bucket_name=s3_bucket)create_security_configuration=configure_security_config(config_name)# [START howto_operator_emr_create_job_flow]create_job_flow=EmrCreateJobFlowOperator(task_id="create_job_flow",job_flow_overrides=JOB_FLOW_OVERRIDES,)# [END howto_operator_emr_create_job_flow]# [START howto_operator_emr_modify_cluster]modify_cluster=EmrModifyClusterOperator(task_id="modify_cluster",cluster_id=create_job_flow.output,step_concurrency_level=1)# [END howto_operator_emr_modify_cluster]# [START howto_operator_emr_add_steps]add_steps=EmrAddStepsOperator(task_id="add_steps",job_flow_id=create_job_flow.output,steps=SPARK_STEPS,execution_role_arn=execution_role_arn,)# [END howto_operator_emr_add_steps]add_steps.wait_for_completion=True# On rare occasion (1 in 50ish?) this system test times out. Extending the# max_attempts from the default 60 to attempt to mitigate the flaky test.add_steps.waiter_max_attempts=90# [START howto_sensor_emr_step]wait_for_step=EmrStepSensor(task_id="wait_for_step",job_flow_id=create_job_flow.output,step_id=get_step_id(add_steps.output),)# [END howto_sensor_emr_step]# [START howto_operator_emr_terminate_job_flow]remove_cluster=EmrTerminateJobFlowOperator(task_id="remove_cluster",job_flow_id=create_job_flow.output,)# [END howto_operator_emr_terminate_job_flow]remove_cluster.trigger_rule=TriggerRule.ALL_DONE# [START howto_sensor_emr_job_flow]check_job_flow=EmrJobFlowSensor(task_id="check_job_flow",job_flow_id=create_job_flow.output)# [END howto_sensor_emr_job_flow]check_job_flow.poke_interval=10delete_security_configuration=delete_security_config(config_name)delete_s3_bucket=S3DeleteBucketOperator(task_id="delete_s3_bucket",bucket_name=s3_bucket,force_delete=True,trigger_rule=TriggerRule.ALL_DONE,)chain(# TEST SETUPtest_context,create_s3_bucket,create_security_configuration,# TEST BODYcreate_job_flow,modify_cluster,add_steps,wait_for_step,# TEST TEARDOWNremove_cluster,check_job_flow,delete_security_configuration,delete_s3_bucket,)fromtests.system.utils.watcherimportwatcher# This test needs watcher in order to properly mark success/failure# when "tearDown" task with trigger rule is part of the DAGlist(dag.tasks)>>watcher()fromtests.system.utilsimportget_test_run# noqa: E402# Needed to run the example DAG with pytest (see: tests/system/README.md#run_via_pytest)