Source code for airflow.providers.slack.transfers.sql_to_slack
# Licensed to the Apache Software Foundation (ASF) under one# or more contributor license agreements. See the NOTICE file# distributed with this work for additional information# regarding copyright ownership. The ASF licenses this file# to you under the Apache License, Version 2.0 (the# "License"); you may not use this file except in compliance# with the License. You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing,# software distributed under the License is distributed on an# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY# KIND, either express or implied. See the License for the# specific language governing permissions and limitations# under the License.from__future__importannotationsfromtempfileimportNamedTemporaryFilefromtypingimportTYPE_CHECKING,Any,Iterable,Mapping,Sequencefromtabulateimporttabulatefromairflow.exceptionsimportAirflowExceptionfromairflow.hooks.baseimportBaseHookfromairflow.modelsimportBaseOperatorfromairflow.providers.slack.hooks.slackimportSlackHookfromairflow.providers.slack.hooks.slack_webhookimportSlackWebhookHookfromairflow.providers.slack.utilsimportparse_filenameifTYPE_CHECKING:importpandasaspdfromslack_sdk.http_retryimportRetryHandlerfromairflow.providers.common.sql.hooks.sqlimportDbApiHookfromairflow.utils.contextimportContext
[docs]classBaseSqlToSlackOperator(BaseOperator):""" Operator implements base sql methods for SQL to Slack Transfer operators. :param sql: The SQL query to be executed :param sql_conn_id: reference to a specific DB-API Connection. :param sql_hook_params: Extra config params to be passed to the underlying hook. Should match the desired hook constructor params. :param parameters: The parameters to pass to the SQL query. :param slack_proxy: Proxy to make the Slack Incoming Webhook / API calls. Optional :param slack_timeout: The maximum number of seconds the client will wait to connect and receive a response from Slack. Optional :param slack_retry_handlers: List of handlers to customize retry logic. Optional """def__init__(self,*,sql:str,sql_conn_id:str,sql_hook_params:dict|None=None,parameters:Iterable|Mapping[str,Any]|None=None,slack_proxy:str|None=None,slack_timeout:int|None=None,slack_retry_handlers:list[RetryHandler]|None=None,**kwargs,):super().__init__(**kwargs)self.sql_conn_id=sql_conn_idself.sql_hook_params=sql_hook_paramsself.sql=sqlself.parameters=parametersself.slack_proxy=slack_proxyself.slack_timeout=slack_timeoutself.slack_retry_handlers=slack_retry_handlersdef_get_hook(self)->DbApiHook:self.log.debug("Get connection for %s",self.sql_conn_id)conn=BaseHook.get_connection(self.sql_conn_id)hook=conn.get_hook(hook_params=self.sql_hook_params)ifnotcallable(getattr(hook,"get_pandas_df",None)):raiseAirflowException("This hook is not supported. The hook class must have get_pandas_df method.")returnhookdef_get_query_results(self)->pd.DataFrame:sql_hook=self._get_hook()self.log.info("Running SQL query: %s",self.sql)df=sql_hook.get_pandas_df(self.sql,parameters=self.parameters)returndf
[docs]classSqlToSlackOperator(BaseSqlToSlackOperator):""" Executes an SQL statement in a given SQL connection and sends the results to Slack. The results of the query are rendered into the 'slack_message' parameter as a Pandas dataframe using a JINJA variable called '{{ results_df }}'. The 'results_df' variable name can be changed by specifying a different 'results_df_name' parameter. The Tabulate library is added to the JINJA environment as a filter to allow the dataframe to be rendered nicely. For example, set 'slack_message' to {{ results_df | tabulate(tablefmt="pretty", headers="keys") }} to send the results to Slack as an ascii rendered table. .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:SqlToSlackOperator` :param sql: The SQL query to be executed (templated) :param slack_message: The templated Slack message to send with the data returned from the SQL connection. You can use the default JINJA variable {{ results_df }} to access the pandas dataframe containing the SQL results :param sql_conn_id: reference to a specific database. :param sql_hook_params: Extra config params to be passed to the underlying hook. Should match the desired hook constructor params. :param slack_conn_id: The connection id for Slack. :param slack_channel: The channel to send message. Override default from Slack connection. :param results_df_name: The name of the JINJA template's dataframe variable, default is 'results_df' :param parameters: The parameters to pass to the SQL query """
def__init__(self,*,sql:str,sql_conn_id:str,slack_conn_id:str,sql_hook_params:dict|None=None,slack_channel:str|None=None,slack_message:str,results_df_name:str="results_df",parameters:Iterable|Mapping[str,Any]|None=None,**kwargs,)->None:super().__init__(sql=sql,sql_conn_id=sql_conn_id,sql_hook_params=sql_hook_params,parameters=parameters,**kwargs)self.slack_conn_id=slack_conn_idself.slack_channel=slack_channelself.slack_message=slack_messageself.results_df_name=results_df_nameself.kwargs=kwargsdef_render_and_send_slack_message(self,context,df)->None:# Put the dataframe into the context and render the JINJA template fieldscontext[self.results_df_name]=dfself.render_template_fields(context)slack_hook=self._get_slack_hook()self.log.info("Sending slack message: %s",self.slack_message)slack_hook.send(text=self.slack_message,channel=self.slack_channel)def_get_slack_hook(self)->SlackWebhookHook:returnSlackWebhookHook(slack_webhook_conn_id=self.slack_conn_id,proxy=self.slack_proxy,timeout=self.slack_timeout,retry_handlers=self.slack_retry_handlers,)
[docs]defrender_template_fields(self,context,jinja_env=None)->None:# If this is the first render of the template fields, exclude slack_message from rendering since# the SQL results haven't been retrieved yet.ifself.times_rendered==0:fields_to_render:Iterable[str]=(xforxinself.template_fieldsifx!="slack_message")else:fields_to_render=self.template_fieldsifnotjinja_env:jinja_env=self.get_template_env()# Add the tabulate library into the JINJA environmentjinja_env.filters["tabulate"]=tabulateself._do_render_template_fields(self,fields_to_render,context,jinja_env,set())self.times_rendered+=1
[docs]defexecute(self,context:Context)->None:ifnotisinstance(self.sql,str):raiseAirflowException("Expected 'sql' parameter should be a string.")ifself.sqlisNoneorself.sql.strip()=="":raiseAirflowException("Expected 'sql' parameter is missing.")ifself.slack_messageisNoneorself.slack_message.strip()=="":raiseAirflowException("Expected 'slack_message' parameter is missing.")df=self._get_query_results()self._render_and_send_slack_message(context,df)self.log.debug("Finished sending SQL data to Slack")
[docs]classSqlToSlackApiFileOperator(BaseSqlToSlackOperator):""" Executes an SQL statement in a given SQL connection and sends the results to Slack API as file. :param sql: The SQL query to be executed :param sql_conn_id: reference to a specific DB-API Connection. :param slack_conn_id: :ref:`Slack API Connection <howto/connection:slack>`. :param slack_filename: Filename for display in slack. Should contain supported extension which referenced to ``SUPPORTED_FILE_FORMATS``. It is also possible to set compression in extension: ``filename.csv.gzip``, ``filename.json.zip``, etc. :param sql_hook_params: Extra config params to be passed to the underlying hook. Should match the desired hook constructor params. :param parameters: The parameters to pass to the SQL query. :param slack_channels: Comma-separated list of channel names or IDs where the file will be shared. If omitting this parameter, then file will send to workspace. :param slack_initial_comment: The message text introducing the file in specified ``slack_channels``. :param slack_title: Title of file. :param slack_base_url: A string representing the Slack API base URL. Optional :param df_kwargs: Keyword arguments forwarded to ``pandas.DataFrame.to_{format}()`` method. Example: .. code-block:: python SqlToSlackApiFileOperator( task_id="sql_to_slack", sql="SELECT 1 a, 2 b, 3 c", sql_conn_id="sql-connection", slack_conn_id="slack-api-connection", slack_filename="awesome.json.gz", slack_channels="#random,#general", slack_initial_comment="Awesome load to compressed multiline JSON.", df_kwargs={ "orient": "records", "lines": True, }, ) """
[docs]defexecute(self,context:Context)->None:# Parse file format from filenameoutput_file_format,_=parse_filename(filename=self.slack_filename,supported_file_formats=self.SUPPORTED_FILE_FORMATS,)slack_hook=SlackHook(slack_conn_id=self.slack_conn_id,base_url=self.slack_base_url,timeout=self.slack_timeout,proxy=self.slack_proxy,retry_handlers=self.slack_retry_handlers,)withNamedTemporaryFile(mode="w+",suffix=f"_{self.slack_filename}")asfp:# tempfile.NamedTemporaryFile used only for create and remove temporary file,# pandas will open file in correct mode itself depend on file type.# So we close file descriptor here for avoid incidentally write anything.fp.close()output_file_name=fp.nameoutput_file_format=output_file_format.upper()df_result=self._get_query_results()ifoutput_file_format=="CSV":df_result.to_csv(output_file_name,**self.df_kwargs)elifoutput_file_format=="JSON":df_result.to_json(output_file_name,**self.df_kwargs)elifoutput_file_format=="HTML":df_result.to_html(output_file_name,**self.df_kwargs)else:# Not expected that this error happen. This only possible# if SUPPORTED_FILE_FORMATS extended and no actual implementation for specific format.raiseAirflowException(f"Unexpected output file format: {output_file_format}")slack_hook.send_file(channels=self.slack_channels,file=output_file_name,filename=self.slack_filename,initial_comment=self.slack_initial_comment,title=self.slack_title,)