Bring Your Own Machine Learning Code to AWS
Run your training code on AWS with minimum effort by bringing your own script or container.
1
2
3
4
5
6
7
8
9
10
11
import boto3
import pandas as pd
import numpy as np
s3 = boto3.client("s3")
s3.download_file(f"sagemaker-sample-files", "datasets/tabular/iris/iris.data", "iris.data")
df = pd.read_csv(
"iris.data", header=None, names=["sepal_len", "sepal_wid", "petal_len", "petal_wid", "class"]
)
df.head()
1
2
3
4
5
# Convert the three classes from strings to integers in {0,1,2}
df["class_cat"] = df["class"].astype("category").cat.codes
categories_map = dict(enumerate(df["class"].astype("category").cat.categories))
print(categories_map)
df.head()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# Split the data into 80-20 train-test split
num_samples = df.shape[0]
split = round(num_samples * 0.8)
train = df.iloc[:split, :]
test = df.iloc[split:, :]
print("{} train, {} test".format(split, num_samples - split))
# Write train and test CSV files
train.to_csv("train.csv", index=False)
test.to_csv("test.csv", index=False)
# Create a sagemaker session to upload data to S3
import sagemaker
sagemaker_session = sagemaker.Session()
# Upload data to default S3 bucket
prefix = "DEMO-sklearn-iris"
training_input_path = sagemaker_session.upload_data("train.csv", key_prefix=prefix + "/training")
- The AWS IAM role used to run the training job
- The instance configuration (count and type)
- The version of sklearn framework, we use. You can find many other versions open-sourced in https://github.com/aws/sagemaker-scikit-learn-container.
- The model hyperparameters
1
2
3
4
5
6
7
8
9
10
11
from sagemaker.sklearn import SKLearn
sk_estimator = SKLearn(
entry_point="train.py",
role=role,
instance_count=1,
instance_type="ml.c5.xlarge",
py_version="py3",
framework_version="1.0-1",
hyperparameters={"estimators": 20},
)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import argparse, os
import boto3
import json
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
import joblib
if __name__ == "__main__":
# Pass in environment variables and hyperparameters
parser = argparse.ArgumentParser()
# Hyperparameters
parser.add_argument("--estimators", type=int, default=15)
# sm_model_dir: model artifacts stored here after training
parser.add_argument("--sm-model-dir", type=str, default=os.environ.get("SM_MODEL_DIR"))
parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN"))
args, _ = parser.parse_known_args()
estimators = args.estimators
model_dir = args.model_dir
sm_model_dir = args.sm_model_dir
training_dir = args.train
# Read in data
df = pd.read_csv(training_dir + "/train.csv", sep=",")
# Preprocess data
X = df.drop(["class", "class_cat"], axis=1)
y = df["class_cat"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Build model
regressor = RandomForestRegressor(n_estimators=estimators)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
# Save model
joblib.dump(regressor, os.path.join(args.sm_model_dir, "model.joblib"))
1
2
# Train the estimator
sk_estimator.fit({"train": training_input_path})
- Use SageMaker Estimator with specific Amazon ECR image deployed for the purpose
- Use SageMaker provided remote decorator
1
2
3
4
5
6
7
8
9
10
11
12
13
import boto3
account_id = boto3.client('sts').get_caller_identity().get('Account')
ecr_repository = 'scikit-learn-custom'
tag = ':latest'
region = boto3.session.Session().region_name
uri_suffix = 'amazonaws.com'
if region in ['cn-north-1', 'cn-northwest-1']:
uri_suffix = 'amazonaws.com.cn'
byoc_image_uri = '{}.dkr.ecr.{}.{}/{}'.format(account_id, region, uri_suffix, ecr_repository + tag)
1
2
3
4
5
6
7
8
9
10
11
12
import sagemaker
from sagemaker import get_execution_role
from sagemaker.estimator import Estimator
estimator = Estimator(image_uri=byoc_image_uri,
role=get_execution_role(),
base_job_name='scikit-custom-container-test-job',
instance_count=1,
instance_type='ml.c5.xlarge')
# Train the estimator
sk_estimator.fit({"train": training_input_path})
- Amazon SageMaker Studio
- Amazon SageMaker notebook
- Local IDE
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import json
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
import joblib
from sagemaker.remote_function import remote
def perform_training(training_dir, estimators):
# Read in data
df = pd.read_csv(training_dir + "/train.csv", sep=",")
# Preprocess data
X = df.drop(["class", "class_cat"], axis=1)
y = df["class_cat"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
# Build model
regressor = RandomForestRegressor(n_estimators=estimators)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
# Save model
joblib.dump(regressor, "model.joblib")
1
2
3
4
5
6
7
8
9
10
11
12
name: sagemaker_example
channels:
- conda-forge
dependencies:
- python=3.10
- pandas
- numpy
- scipy
- scikit-learn
- s3fs==0.4.2
pip:
- sagemaker
1
2
input_data_path = "<s3_uri_path_to_data>"
perform_training(input_data_path, 20)
- Dependencies: Path to requirements.txt file or to Conda environment yaml as demonstrated in the previous example.
- EnvironmentVariables: Environment variables available to the script.
- ImageUri: Amazon ECR image location to run the job.
- InstanceType: Type of instance used for the Amazon SageMaker training job.
- RoleArn: IAM role used to run the Amazon Training job.
- S3KmsKeyId: Id of the KMS key used to encrypt the output data.
- S3RootUri: S3 location used to store output artifacts.
- SecurityGroupIds and Subnets: Networking configuration for the SageMaker training job.
- Tags: Tags used for the SageMaker training job.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
SchemaVersion: '1.0'
SageMaker:
PythonSDK:
Modules:
RemoteFunction:
Dependencies: path/to/requirements.txt or path/to/environment.yml
EnvironmentVariables: {"EnvVarKey": "EnvVarValue"}
ImageUri: 366666666666.dkr.ecr.us-west-2.amazonaws.com/my-image:latest
InstanceType: ml.m5.large
RoleArn: arn:aws:iam::366666666666:role/MyRole
S3KmsKeyId: somekmskeyid
S3RootUri: s3://my-bucket/my-project
SecurityGroupIds:
- sg123
Subnets:
- subnet-1234
Tags:
- {"Key": "someTagKey", "Value": "someTagValue"}
- Use Amazon SageMaker build-in algorithms
- Bring your own script but leverage Amazon SageMaker provided framework
- Bring your own container either by building the container or leveraging the @remote execution.
Any opinions in this post are those of the individual author and may not reflect the opinions of AWS.