Source code for airflow.providers.amazon.aws.sensors.s3_key

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Optional, Union
from urllib.parse import urlparse

from airflow.exceptions import AirflowException
from airflow.providers.amazon.aws.hooks.s3 import S3Hook
from airflow.sensors.base import BaseSensorOperator
from airflow.utils.decorators import apply_defaults


[docs]class S3KeySensor(BaseSensorOperator): """ Waits for a key (a file-like instance on S3) to be present in a S3 bucket. S3 being a key/value it does not support folders. The path is just a key a resource. :param bucket_key: The key being waited on. Supports full s3:// style url or relative path from root level. When it's specified as a full s3:// url, please leave bucket_name as `None`. :type bucket_key: str :param bucket_name: Name of the S3 bucket. Only needed when ``bucket_key`` is not provided as a full s3:// url. :type bucket_name: str :param wildcard_match: whether the bucket_key should be interpreted as a Unix wildcard pattern :type wildcard_match: bool :param aws_conn_id: a reference to the s3 connection :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. You can provide the following values: - ``False``: do not validate SSL certificates. SSL will still be used (unless use_ssl is False), but SSL certificates will not be verified. - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses. You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. :type verify: bool or str """
[docs] template_fields = ('bucket_key', 'bucket_name')
@apply_defaults def __init__( self, *, bucket_key: str, bucket_name: Optional[str] = None, wildcard_match: bool = False, aws_conn_id: str = 'aws_default', verify: Optional[Union[str, bool]] = None, **kwargs, ): super().__init__(**kwargs) self.bucket_name = bucket_name self.bucket_key = bucket_key self.wildcard_match = wildcard_match self.aws_conn_id = aws_conn_id self.verify = verify self.hook: Optional[S3Hook] = None
[docs] def poke(self, context): if self.bucket_name is None: parsed_url = urlparse(self.bucket_key) if parsed_url.netloc == '': raise AirflowException('If key is a relative path from root, please provide a bucket_name') self.bucket_name = parsed_url.netloc self.bucket_key = parsed_url.path.lstrip('/') else: parsed_url = urlparse(self.bucket_key) if parsed_url.scheme != '' or parsed_url.netloc != '': raise AirflowException( 'If bucket_name is provided, bucket_key' + ' should be relative path from root' + ' level, rather than a full s3:// url' ) self.log.info('Poking for key : s3://%s/%s', self.bucket_name, self.bucket_key) if self.wildcard_match: return self.get_hook().check_for_wildcard_key(self.bucket_key, self.bucket_name) return self.get_hook().check_for_key(self.bucket_key, self.bucket_name)
[docs] def get_hook(self) -> S3Hook: """Create and return an S3Hook""" if self.hook: return self.hook self.hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify) return self.hook

Was this entry helpful?