Source code for airflow.providers.amazon.aws.links.emr
# Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.from__future__importannotationsfromtypingimportTYPE_CHECKING,Anyfromurllib.parseimportParseResult,quote_plus,urlparsefromairflow.exceptionsimportAirflowExceptionfromairflow.providers.amazon.aws.hooks.emrimportEmrServerlessHookfromairflow.providers.amazon.aws.hooks.s3importS3Hookfromairflow.providers.amazon.aws.links.base_awsimportBASE_AWS_CONSOLE_LINK,BaseAwsLinkfromairflow.utils.helpersimportexactly_oneifTYPE_CHECKING:importboto3
[docs]classEmrClusterLink(BaseAwsLink):"""Helper class for constructing Amazon EMR Cluster Link."""
[docs]defget_serverless_log_uri(*,s3_log_uri:str,application_id:str,job_run_id:str)->str:""" Retrieve the S3 URI to EMR Serverless Job logs. Any EMR Serverless job may have a different S3 logging location (or none), which is an S3 URI. The logging location is then {s3_uri}/applications/{application_id}/jobs/{job_run_id}. """returnf"{s3_log_uri}/applications/{application_id}/jobs/{job_run_id}"
[docs]defget_serverless_dashboard_url(*,aws_conn_id:str|None=None,emr_serverless_client:boto3.client=None,application_id:str,job_run_id:str,)->ParseResult|None:""" Retrieve the URL to EMR Serverless dashboard. The URL is a one-use, ephemeral link that expires in 1 hour and is accessible without authentication. Either an AWS connection ID or existing EMR Serverless client must be passed. If the connection ID is passed, a client is generated using that connection. """ifnotexactly_one(aws_conn_id,emr_serverless_client):raiseAirflowException("Requires either an AWS connection ID or an EMR Serverless Client.")ifaws_conn_id:# If get_dashboard_for_job_run fails for whatever reason, fail after 1 attempt# so that the rest of the links load in a reasonable time frame.hook=EmrServerlessHook(aws_conn_id=aws_conn_id,config={"retries":{"total_max_attempts":1}})emr_serverless_client=hook.connresponse=emr_serverless_client.get_dashboard_for_job_run(applicationId=application_id,jobRunId=job_run_id)if"url"notinresponse:returnNonelog_uri=urlparse(response["url"])returnlog_uri
[docs]defget_log_uri(*,cluster:dict[str,Any]|None=None,emr_client:boto3.client=None,job_flow_id:str|None=None)->str|None:""" Retrieve the S3 URI to the EMR Job logs. Requires either the output of a describe_cluster call or both an EMR Client and a job_flow_id.. """ifnotexactly_one(bool(cluster),emr_clientandjob_flow_id):raiseAirflowException("Requires either the output of a describe_cluster call or both an EMR Client and a job_flow_id.")cluster_info=(clusteroremr_client.describe_cluster(ClusterId=job_flow_id))["Cluster"]if"LogUri"notincluster_info:returnNonelog_uri=S3Hook.parse_s3_url(cluster_info["LogUri"])return"/".join(log_uri)
[docs]classEmrServerlessLogsLink(BaseAwsLink):"""Helper class for constructing Amazon EMR Serverless link to Spark stdout logs."""
[docs]classEmrServerlessCloudWatchLogsLink(BaseAwsLink):""" Helper class for constructing link to CloudWatch console for Amazon EMR Serverless Logs. This is a deep link that filters on a specific job run. """