tests.system.providers.amazon.aws.example_sagemaker

Module Contents

Functions

set_up(env_id, role_arn)

delete_ecr_repository(repository_name)

delete_logs(env_id)

Attributes

DAG_ID

ROLE_ARN_KEY

sys_test_context_task

KNN_IMAGES_BY_REGION

DATASET

SAMPLE_SIZE

PREPROCESS_SCRIPT_TEMPLATE

test_context

test_run

tests.system.providers.amazon.aws.example_sagemaker.DAG_ID = example_sagemaker[source]
tests.system.providers.amazon.aws.example_sagemaker.ROLE_ARN_KEY = ROLE_ARN[source]
tests.system.providers.amazon.aws.example_sagemaker.sys_test_context_task[source]
tests.system.providers.amazon.aws.example_sagemaker.KNN_IMAGES_BY_REGION[source]
tests.system.providers.amazon.aws.example_sagemaker.DATASET = Multiline-String[source]
Show Value
1        5.1,3.5,1.4,0.2,Iris-setosa
2        4.9,3.0,1.4,0.2,Iris-setosa
3        7.0,3.2,4.7,1.4,Iris-versicolor
4        6.4,3.2,4.5,1.5,Iris-versicolor
5        4.9,2.5,4.5,1.7,Iris-virginica
6        7.3,2.9,6.3,1.8,Iris-virginica
tests.system.providers.amazon.aws.example_sagemaker.SAMPLE_SIZE[source]
tests.system.providers.amazon.aws.example_sagemaker.PREPROCESS_SCRIPT_TEMPLATE = Multiline-String[source]
Show Value
 1import boto3
 2import numpy as np
 3import pandas as pd
 4
 5def main():
 6    # Load the Iris dataset from {input_path}/input.csv, split it into train/test
 7    # subsets, and write them to {output_path}/ for the Processing Operator.
 8
 9    columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
10    iris = pd.read_csv('{input_path}/input.csv', names=columns)
11
12    # Process data
13    iris['species'] = iris['species'].replace({{'Iris-virginica': 0, 'Iris-versicolor': 1, 'Iris-setosa': 2}})
14    iris = iris[['species', 'sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
15
16    # Split into test and train data
17    iris_train, iris_test = np.split(
18        iris.sample(frac=1, random_state=np.random.RandomState()), [int(0.7 * len(iris))]
19    )
20
21    # Remove the "answers" from the test set
22    iris_test.drop(['species'], axis=1, inplace=True)
23
24    # Write the splits to disk
25    iris_train.to_csv('{output_path}/train.csv', index=False, header=False)
26    iris_test.to_csv('{output_path}/test.csv', index=False, header=False)
27
28    print('Preprocessing Done.')
29
30if __name__ == "__main__":
31    main()
tests.system.providers.amazon.aws.example_sagemaker.set_up(env_id, role_arn)[source]
tests.system.providers.amazon.aws.example_sagemaker.delete_ecr_repository(repository_name)[source]
tests.system.providers.amazon.aws.example_sagemaker.delete_logs(env_id)[source]
tests.system.providers.amazon.aws.example_sagemaker.test_context[source]
tests.system.providers.amazon.aws.example_sagemaker.test_run[source]

Was this entry helpful?