Source code for airflow.providers.http.hooks.http

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations

from typing import Any, Callable

import requests
import tenacity
from requests.auth import HTTPBasicAuth
from requests_toolbelt.adapters.socket_options import TCPKeepAliveAdapter

from airflow.exceptions import AirflowException
from airflow.hooks.base import BaseHook


[docs]class HttpHook(BaseHook): """ Interact with HTTP servers. :param method: the API method to be called :param http_conn_id: :ref:`http connection<howto/connection:http>` that has the base API url i.e https://www.google.com/ and optional authentication credentials. Default headers can also be specified in the Extra field in json format. :param auth_type: The auth type for the service :param tcp_keep_alive: Enable TCP Keep Alive for the connection. :param tcp_keep_alive_idle: The TCP Keep Alive Idle parameter (corresponds to ``socket.TCP_KEEPIDLE``). :param tcp_keep_alive_count: The TCP Keep Alive count parameter (corresponds to ``socket.TCP_KEEPCNT``) :param tcp_keep_alive_interval: The TCP Keep Alive interval parameter (corresponds to ``socket.TCP_KEEPINTVL``) """
[docs] conn_name_attr = 'http_conn_id'
[docs] default_conn_name = 'http_default'
[docs] conn_type = 'http'
[docs] hook_name = 'HTTP'
def __init__( self, method: str = 'POST', http_conn_id: str = default_conn_name, auth_type: Any = HTTPBasicAuth, tcp_keep_alive: bool = True, tcp_keep_alive_idle: int = 120, tcp_keep_alive_count: int = 20, tcp_keep_alive_interval: int = 30, ) -> None: super().__init__() self.http_conn_id = http_conn_id self.method = method.upper() self.base_url: str = "" self._retry_obj: Callable[..., Any] self.auth_type: Any = auth_type self.tcp_keep_alive = tcp_keep_alive self.keep_alive_idle = tcp_keep_alive_idle self.keep_alive_count = tcp_keep_alive_count self.keep_alive_interval = tcp_keep_alive_interval # headers may be passed through directly or in the "extra" field in the connection # definition
[docs] def get_conn(self, headers: dict[Any, Any] | None = None) -> requests.Session: """ Returns http session for use with requests :param headers: additional headers to be passed through as a dictionary """ session = requests.Session() if self.http_conn_id: conn = self.get_connection(self.http_conn_id) if conn.host and "://" in conn.host: self.base_url = conn.host else: # schema defaults to HTTP schema = conn.schema if conn.schema else "http" host = conn.host if conn.host else "" self.base_url = schema + "://" + host if conn.port: self.base_url = self.base_url + ":" + str(conn.port) if conn.login: session.auth = self.auth_type(conn.login, conn.password) if conn.extra: try: session.headers.update(conn.extra_dejson) except TypeError: self.log.warning('Connection to %s has invalid extra field.', conn.host) if headers: session.headers.update(headers) return session
[docs] def run( self, endpoint: str | None = None, data: dict[str, Any] | str | None = None, headers: dict[str, Any] | None = None, extra_options: dict[str, Any] | None = None, **request_kwargs: Any, ) -> Any: r""" Performs the request :param endpoint: the endpoint to be called i.e. resource/v1/query? :param data: payload to be uploaded or request parameters :param headers: additional headers to be passed through as a dictionary :param extra_options: additional options to be used when executing the request i.e. {'check_response': False} to avoid checking raising exceptions on non 2XX or 3XX status codes :param request_kwargs: Additional kwargs to pass when creating a request. For example, ``run(json=obj)`` is passed as ``requests.Request(json=obj)`` """ extra_options = extra_options or {} session = self.get_conn(headers) url = self.url_from_endpoint(endpoint) if self.tcp_keep_alive: keep_alive_adapter = TCPKeepAliveAdapter( idle=self.keep_alive_idle, count=self.keep_alive_count, interval=self.keep_alive_interval ) session.mount(url, keep_alive_adapter) if self.method == 'GET': # GET uses params req = requests.Request(self.method, url, params=data, headers=headers, **request_kwargs) elif self.method == 'HEAD': # HEAD doesn't use params req = requests.Request(self.method, url, headers=headers, **request_kwargs) else: # Others use data req = requests.Request(self.method, url, data=data, headers=headers, **request_kwargs) prepped_request = session.prepare_request(req) self.log.info("Sending '%s' to url: %s", self.method, url) return self.run_and_check(session, prepped_request, extra_options)
[docs] def check_response(self, response: requests.Response) -> None: """ Checks the status code and raise an AirflowException exception on non 2XX or 3XX status codes :param response: A requests response object """ try: response.raise_for_status() except requests.exceptions.HTTPError: self.log.error("HTTP error: %s", response.reason) self.log.error(response.text) raise AirflowException(str(response.status_code) + ":" + response.reason)
[docs] def run_and_check( self, session: requests.Session, prepped_request: requests.PreparedRequest, extra_options: dict[Any, Any], ) -> Any: """ Grabs extra options like timeout and actually runs the request, checking for the result :param session: the session to be used to execute the request :param prepped_request: the prepared request generated in run() :param extra_options: additional options to be used when executing the request i.e. ``{'check_response': False}`` to avoid checking raising exceptions on non 2XX or 3XX status codes """ extra_options = extra_options or {} settings = session.merge_environment_settings( prepped_request.url, proxies=extra_options.get("proxies", {}), stream=extra_options.get("stream", False), verify=extra_options.get("verify"), cert=extra_options.get("cert"), ) # Send the request. send_kwargs: dict[str, Any] = { "timeout": extra_options.get("timeout"), "allow_redirects": extra_options.get("allow_redirects", True), } send_kwargs.update(settings) try: response = session.send(prepped_request, **send_kwargs) if extra_options.get('check_response', True): self.check_response(response) return response except requests.exceptions.ConnectionError as ex: self.log.warning('%s Tenacity will retry to execute the operation', ex) raise ex
[docs] def run_with_advanced_retry(self, _retry_args: dict[Any, Any], *args: Any, **kwargs: Any) -> Any: """ Runs Hook.run() with a Tenacity decorator attached to it. This is useful for connectors which might be disturbed by intermittent issues and should not instantly fail. :param _retry_args: Arguments which define the retry behaviour. See Tenacity documentation at https://github.com/jd/tenacity .. code-block:: python hook = HttpHook(http_conn_id="my_conn", method="GET") retry_args = dict( wait=tenacity.wait_exponential(), stop=tenacity.stop_after_attempt(10), retry=tenacity.retry_if_exception_type(Exception), ) hook.run_with_advanced_retry(endpoint="v1/test", _retry_args=retry_args) """ self._retry_obj = tenacity.Retrying(**_retry_args) return self._retry_obj(self.run, *args, **kwargs)
[docs] def url_from_endpoint(self, endpoint: str | None) -> str: """Combine base url with endpoint""" if self.base_url and not self.base_url.endswith('/') and endpoint and not endpoint.startswith('/'): return self.base_url + '/' + endpoint return (self.base_url or '') + (endpoint or '')
[docs] def test_connection(self): """Test HTTP Connection""" try: self.run() return True, 'Connection successfully tested' except Exception as e: return False, str(e)

Was this entry helpful?