Source code for airflow.providers.snowflake.transfers.s3_to_snowflake

#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

"""This module contains AWS S3 to Snowflake operator."""
from typing import Any, Optional

from airflow.models import BaseOperator
from airflow.providers.snowflake.hooks.snowflake import SnowflakeHook
from airflow.utils.decorators import apply_defaults


[docs]class S3ToSnowflakeOperator(BaseOperator): """ Executes an COPY command to load files from s3 to Snowflake .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:S3ToSnowflakeOperator` :param s3_keys: reference to a list of S3 keys :type s3_keys: list :param table: reference to a specific table in snowflake database :type table: str :param schema: name of schema (will overwrite schema defined in connection) :type schema: str :param stage: reference to a specific snowflake stage. If the stage's schema is not the same as the table one, it must be specified :type stage: str :param prefix: cloud storage location specified to limit the set of files to load :type prefix: str :param file_format: reference to a specific file format :type file_format: str :param warehouse: name of warehouse (will overwrite any warehouse defined in the connection's extra JSON) :type warehouse: str :param database: reference to a specific database in Snowflake connection :type database: str :param columns_array: reference to a specific columns array in snowflake database :type columns_array: list :param snowflake_conn_id: reference to a specific snowflake connection :type snowflake_conn_id: str :param role: name of role (will overwrite any role defined in connection's extra JSON) :type role: str :param authenticator: authenticator for Snowflake. 'snowflake' (default) to use the internal Snowflake authenticator 'externalbrowser' to authenticate using your web browser and Okta, ADFS or any other SAML 2.0-compliant identify provider (IdP) that has been defined for your account 'https://<your_okta_account_name>.okta.com' to authenticate through native Okta. :type authenticator: str :param session_parameters: You can set session-level parameters at the time you connect to Snowflake :type session_parameters: dict """ @apply_defaults def __init__( self, *, s3_keys: Optional[list] = None, table: str, stage: str, prefix: Optional[str] = None, file_format: str, schema: str, # TODO: shouldn't be required, rely on session/user defaults columns_array: Optional[list] = None, warehouse: Optional[str] = None, database: Optional[str] = None, autocommit: bool = True, snowflake_conn_id: str = 'snowflake_default', role: Optional[str] = None, authenticator: Optional[str] = None, session_parameters: Optional[dict] = None, **kwargs, ) -> None: super().__init__(**kwargs) self.s3_keys = s3_keys self.table = table self.warehouse = warehouse self.database = database self.stage = stage self.prefix = prefix self.file_format = file_format self.schema = schema self.columns_array = columns_array self.autocommit = autocommit self.snowflake_conn_id = snowflake_conn_id self.role = role self.authenticator = authenticator self.session_parameters = session_parameters
[docs] def execute(self, context: Any) -> None: snowflake_hook = SnowflakeHook( snowflake_conn_id=self.snowflake_conn_id, warehouse=self.warehouse, database=self.database, role=self.role, schema=self.schema, authenticator=self.authenticator, session_parameters=self.session_parameters, ) files = "" if self.s3_keys: files = "files=({})".format(", ".join(f"'{key}'" for key in self.s3_keys)) # we can extend this based on stage base_sql = """ FROM @{stage}/{prefix} {files} file_format={file_format} """.format( stage=self.stage, prefix=(self.prefix if self.prefix else ""), files=files, file_format=self.file_format, ) if self.columns_array: copy_query = """ COPY INTO {schema}.{table}({columns}) {base_sql} """.format( schema=self.schema, table=self.table, columns=",".join(self.columns_array), base_sql=base_sql ) else: copy_query = f""" COPY INTO {self.schema}.{self.table} {base_sql} """ copy_query = "\n".join(line.strip() for line in copy_query.splitlines()) self.log.info('Executing COPY command...') snowflake_hook.run(copy_query, self.autocommit) self.log.info("COPY command completed")

Was this entry helpful?