#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import annotations
from typing import Any, Callable
import requests
import tenacity
from requests.auth import HTTPBasicAuth
from requests_toolbelt.adapters.socket_options import TCPKeepAliveAdapter
from airflow.exceptions import AirflowException
from airflow.hooks.base import BaseHook
[docs]class HttpHook(BaseHook):
"""
Interact with HTTP servers.
:param method: the API method to be called
:param http_conn_id: :ref:`http connection<howto/connection:http>` that has the base
API url i.e https://www.google.com/ and optional authentication credentials. Default
headers can also be specified in the Extra field in json format.
:param auth_type: The auth type for the service
:param tcp_keep_alive: Enable TCP Keep Alive for the connection.
:param tcp_keep_alive_idle: The TCP Keep Alive Idle parameter (corresponds to ``socket.TCP_KEEPIDLE``).
:param tcp_keep_alive_count: The TCP Keep Alive count parameter (corresponds to ``socket.TCP_KEEPCNT``)
:param tcp_keep_alive_interval: The TCP Keep Alive interval parameter (corresponds to
``socket.TCP_KEEPINTVL``)
"""
[docs] conn_name_attr = 'http_conn_id'
[docs] default_conn_name = 'http_default'
def __init__(
self,
method: str = 'POST',
http_conn_id: str = default_conn_name,
auth_type: Any = HTTPBasicAuth,
tcp_keep_alive: bool = True,
tcp_keep_alive_idle: int = 120,
tcp_keep_alive_count: int = 20,
tcp_keep_alive_interval: int = 30,
) -> None:
super().__init__()
self.http_conn_id = http_conn_id
self.method = method.upper()
self.base_url: str = ""
self._retry_obj: Callable[..., Any]
self.auth_type: Any = auth_type
self.tcp_keep_alive = tcp_keep_alive
self.keep_alive_idle = tcp_keep_alive_idle
self.keep_alive_count = tcp_keep_alive_count
self.keep_alive_interval = tcp_keep_alive_interval
# headers may be passed through directly or in the "extra" field in the connection
# definition
[docs] def get_conn(self, headers: dict[Any, Any] | None = None) -> requests.Session:
"""
Returns http session for use with requests
:param headers: additional headers to be passed through as a dictionary
"""
session = requests.Session()
if self.http_conn_id:
conn = self.get_connection(self.http_conn_id)
if conn.host and "://" in conn.host:
self.base_url = conn.host
else:
# schema defaults to HTTP
schema = conn.schema if conn.schema else "http"
host = conn.host if conn.host else ""
self.base_url = schema + "://" + host
if conn.port:
self.base_url = self.base_url + ":" + str(conn.port)
if conn.login:
session.auth = self.auth_type(conn.login, conn.password)
if conn.extra:
try:
session.headers.update(conn.extra_dejson)
except TypeError:
self.log.warning('Connection to %s has invalid extra field.', conn.host)
if headers:
session.headers.update(headers)
return session
[docs] def run(
self,
endpoint: str | None = None,
data: dict[str, Any] | str | None = None,
headers: dict[str, Any] | None = None,
extra_options: dict[str, Any] | None = None,
**request_kwargs: Any,
) -> Any:
r"""
Performs the request
:param endpoint: the endpoint to be called i.e. resource/v1/query?
:param data: payload to be uploaded or request parameters
:param headers: additional headers to be passed through as a dictionary
:param extra_options: additional options to be used when executing the request
i.e. {'check_response': False} to avoid checking raising exceptions on non
2XX or 3XX status codes
:param request_kwargs: Additional kwargs to pass when creating a request.
For example, ``run(json=obj)`` is passed as ``requests.Request(json=obj)``
"""
extra_options = extra_options or {}
session = self.get_conn(headers)
url = self.url_from_endpoint(endpoint)
if self.tcp_keep_alive:
keep_alive_adapter = TCPKeepAliveAdapter(
idle=self.keep_alive_idle, count=self.keep_alive_count, interval=self.keep_alive_interval
)
session.mount(url, keep_alive_adapter)
if self.method == 'GET':
# GET uses params
req = requests.Request(self.method, url, params=data, headers=headers, **request_kwargs)
elif self.method == 'HEAD':
# HEAD doesn't use params
req = requests.Request(self.method, url, headers=headers, **request_kwargs)
else:
# Others use data
req = requests.Request(self.method, url, data=data, headers=headers, **request_kwargs)
prepped_request = session.prepare_request(req)
self.log.info("Sending '%s' to url: %s", self.method, url)
return self.run_and_check(session, prepped_request, extra_options)
[docs] def check_response(self, response: requests.Response) -> None:
"""
Checks the status code and raise an AirflowException exception on non 2XX or 3XX
status codes
:param response: A requests response object
"""
try:
response.raise_for_status()
except requests.exceptions.HTTPError:
self.log.error("HTTP error: %s", response.reason)
self.log.error(response.text)
raise AirflowException(str(response.status_code) + ":" + response.reason)
[docs] def run_and_check(
self,
session: requests.Session,
prepped_request: requests.PreparedRequest,
extra_options: dict[Any, Any],
) -> Any:
"""
Grabs extra options like timeout and actually runs the request,
checking for the result
:param session: the session to be used to execute the request
:param prepped_request: the prepared request generated in run()
:param extra_options: additional options to be used when executing the request
i.e. ``{'check_response': False}`` to avoid checking raising exceptions on non 2XX
or 3XX status codes
"""
extra_options = extra_options or {}
settings = session.merge_environment_settings(
prepped_request.url,
proxies=extra_options.get("proxies", {}),
stream=extra_options.get("stream", False),
verify=extra_options.get("verify"),
cert=extra_options.get("cert"),
)
# Send the request.
send_kwargs: dict[str, Any] = {
"timeout": extra_options.get("timeout"),
"allow_redirects": extra_options.get("allow_redirects", True),
}
send_kwargs.update(settings)
try:
response = session.send(prepped_request, **send_kwargs)
if extra_options.get('check_response', True):
self.check_response(response)
return response
except requests.exceptions.ConnectionError as ex:
self.log.warning('%s Tenacity will retry to execute the operation', ex)
raise ex
[docs] def run_with_advanced_retry(self, _retry_args: dict[Any, Any], *args: Any, **kwargs: Any) -> Any:
"""
Runs Hook.run() with a Tenacity decorator attached to it. This is useful for
connectors which might be disturbed by intermittent issues and should not
instantly fail.
:param _retry_args: Arguments which define the retry behaviour.
See Tenacity documentation at https://github.com/jd/tenacity
.. code-block:: python
hook = HttpHook(http_conn_id="my_conn", method="GET")
retry_args = dict(
wait=tenacity.wait_exponential(),
stop=tenacity.stop_after_attempt(10),
retry=tenacity.retry_if_exception_type(Exception),
)
hook.run_with_advanced_retry(endpoint="v1/test", _retry_args=retry_args)
"""
self._retry_obj = tenacity.Retrying(**_retry_args)
return self._retry_obj(self.run, *args, **kwargs)
[docs] def url_from_endpoint(self, endpoint: str | None) -> str:
"""Combine base url with endpoint"""
if self.base_url and not self.base_url.endswith('/') and endpoint and not endpoint.startswith('/'):
return self.base_url + '/' + endpoint
return (self.base_url or '') + (endpoint or '')
[docs] def test_connection(self):
"""Test HTTP Connection"""
try:
self.run()
return True, 'Connection successfully tested'
except Exception as e:
return False, str(e)