Source code for airflow.providers.apache.hdfs.sensors.web_hdfs
## Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.from__future__importannotationsfromcollections.abcimportSequencefromtypingimportTYPE_CHECKING,Anyfromairflow.sensors.baseimportBaseSensorOperatorifTYPE_CHECKING:fromhdfsimportInsecureClientfromhdfs.ext.kerberosimportKerberosClientfromairflow.utils.contextimportContext
[docs]classWebHdfsSensor(BaseSensorOperator):"""Waits for a file or folder to land in HDFS."""
[docs]defpoke(self,context:Context)->bool:fromairflow.providers.apache.hdfs.hooks.webhdfsimportWebHDFSHookhook=WebHDFSHook(self.webhdfs_conn_id)self.log.info("Poking for file %s",self.filepath)returnhook.check_for_path(hdfs_path=self.filepath)
[docs]classMultipleFilesWebHdfsSensor(BaseSensorOperator):"""Waits for multiple files in a folder to land in HDFS."""
[docs]defpoke(self,context:Context)->bool:fromairflow.providers.apache.hdfs.hooks.webhdfsimportWebHDFSHookhook=WebHDFSHook(self.webhdfs_conn_id)conn:KerberosClient|InsecureClient=hook.get_conn()actual_files=set(conn.list(self.directory_path))self.log.debug("Files Found in directory: %s",actual_files)missing_files=set(self.expected_filenames)-actual_filesifmissing_files:self.log.info("There are missing files: %s",missing_files)returnFalsereturnTrue