## Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.importrefromtypingimportCallable,List,Optional,
[docs]classS3KeySensor(BaseSensorOperator):""" Waits for a key (a file-like instance on S3) to be present in a S3 bucket. S3 being a key/value it does not support folders. The path is just a key a resource. :param bucket_key: The key being waited on. Supports full s3:// style url or relative path from root level. When it's specified as a full s3:// url, please leave bucket_name as `None`. :type bucket_key: str :param bucket_name: Name of the S3 bucket. Only needed when ``bucket_key`` is not provided as a full s3:// url. :type bucket_name: str :param wildcard_match: whether the bucket_key should be interpreted as a Unix wildcard pattern :type wildcard_match: bool :param aws_conn_id: a reference to the s3 connection :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. You can provide the following values: - ``False``: do not validate SSL certificates. SSL will still be used (unless use_ssl is False), but SSL certificates will not be verified. - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses. You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. :type verify: bool or str """
[docs]defpoke(self,context):ifself.bucket_nameisNone:parsed_url=urlparse(self.bucket_key)ifparsed_url.netloc=='':raiseAirflowException('If key is a relative path from root, please provide a bucket_name')self.bucket_name=parsed_url.netlocself.bucket_key=parsed_url.path.lstrip('/')else:parsed_url=urlparse(self.bucket_key)ifparsed_url.scheme!=''orparsed_url.netloc!='':raiseAirflowException('If bucket_name is provided, bucket_key'+' should be relative path from root'+' level, rather than a full s3:// url')'Poking for key : s3://%s/%s',self.bucket_name,self.bucket_key)ifself.wildcard_match:returnself.get_hook().check_for_wildcard_key(self.bucket_key,self.bucket_name)returnself.get_hook().check_for_key(self.bucket_key,self.bucket_name)
[docs]defget_hook(self)->S3Hook:"""Create and return an S3Hook"""ifself.hook:returnself.hookself.hook=S3Hook(aws_conn_id=self.aws_conn_id,verify=self.verify)returnself.hook
[docs]classS3KeySizeSensor(S3KeySensor):""" Waits for a key (a file-like instance on S3) to be present and be more than some size in a S3 bucket. S3 being a key/value it does not support folders. The path is just a key a resource. :param bucket_key: The key being waited on. Supports full s3:// style url or relative path from root level. When it's specified as a full s3:// url, please leave bucket_name as `None`. :type bucket_key: str :param bucket_name: Name of the S3 bucket. Only needed when ``bucket_key`` is not provided as a full s3:// url. :type bucket_name: str :param wildcard_match: whether the bucket_key should be interpreted as a Unix wildcard pattern :type wildcard_match: bool :param aws_conn_id: a reference to the s3 connection :type aws_conn_id: str :param verify: Whether or not to verify SSL certificates for S3 connection. By default SSL certificates are verified. You can provide the following values: - ``False``: do not validate SSL certificates. SSL will still be used (unless use_ssl is False), but SSL certificates will not be verified. - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses. You can specify this argument if you want to use a different CA cert bundle than the one used by botocore. :type verify: bool or str :type check_fn: Optional[Callable[..., bool]] :param check_fn: Function that receives the list of the S3 objects, and returns the boolean: - ``True``: a certain criteria is met - ``False``: the criteria isn't met **Example**: Wait for any S3 object size more than 1 megabyte :: def check_fn(self, data: List) -> bool: return any(f.get('Size', 0) > 1048576 for f in data if isinstance(f, dict)) :type check_fn: Optional[Callable[..., bool]] """def__init__(self,*,check_fn:Optional[Callable[...,bool]]=None,**kwargs,):super().__init__(**kwargs)self.check_fn_user=check_fn
[docs]defget_files(self,s3_hook:S3Hook,delimiter:Optional[str]='/')->List:"""Gets a list of files in the bucket"""prefix=self.bucket_keyconfig={'PageSize':None,'MaxItems':None,}ifself.wildcard_match:prefix=re.split(r'[*]',self.bucket_key,1)[0]paginator=s3_hook.get_conn().get_paginator('list_objects_v2')response=paginator.paginate(Bucket=self.bucket_name,Prefix=prefix,Delimiter=delimiter,PaginationConfig=config)keys=[]forpageinresponse:if'Contents'inpage:_temp=[kforkinpage['Contents']ifisinstance(k.get('Size',None),(int,float))]keys=keys+_tempreturnkeys
[docs]defcheck_fn(self,data:List,object_min_size:Optional[Union[int,float]]=0)->bool:"""Default function for checking that S3 Objects have size more than 0 :param data: List of the objects in S3 bucket. :type data: list :param object_min_size: Checks if the objects sizes are greater then this value. :type object_min_size: int """returnall(f.get('Size',0)>object_min_sizeforfindataifisinstance(f,dict))