Fast Pre-trained Model Deployment - The code only approach
Deploying pre-trained and foundation models quickly, and programmatically, using Amazon SageMaker JumpStart, and the SageMaker SDK.
pip
you can install the latest SageMaker SDK as follows:1
!pip install sagemaker
1
2
3
4
5
6
import sagemaker
from sagemaker.jumpstart.notebook_utils import list_jumpstart_models
from sagemaker.jumpstart.filters import And, Or
from sagemaker import image_uris, model_uris, script_uris, hyperparameters
from sagemaker.utils import name_from_base
from sagemaker import get_execution_role
1
2
import json # Only used to make dict prints pretty
import boto3 # If this is not installed - pip install boto3
list_jumpstart_models()
JumpStart notebook utility allows us to get a list of all the current JumpStart models. Let's get them all and count them!1
2
3
all_models = list_jumpstart_models()
len(all_models)
1
> 620
1
2
3
4
filter_value = "framework == pytorch"
filtered_models = list_jumpstart_models(filter=filter_value)
len(filtered_models)
1
> 52
1
filtered_models
1
2
3
4
5
6
7
8
9
['pytorch-eqa-bert-base-cased',
'pytorch-eqa-bert-base-multilingual-cased',
'pytorch-eqa-bert-base-multilingual-uncased',
'pytorch-eqa-bert-base-uncased',
...
'pytorch-od1-fasterrcnn-resnet50-fpn',
'pytorch-tabtransformerclassification-model',
'pytorch-tabtransformerregression-model',
'pytorch-textgeneration1-alexa20b']
pytorch
models for image classification (ic
).1
2
3
4
filter_value = And("task == ic", "framework == pytorch")
filtered_models = list_jumpstart_models(filter=filter_value)
filtered_models
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
['pytorch-ic-alexnet',
'pytorch-ic-densenet121',
'pytorch-ic-densenet161',
'pytorch-ic-densenet169',
'pytorch-ic-densenet201',
'pytorch-ic-googlenet',
'pytorch-ic-mobilenet-v2',
'pytorch-ic-resnet101',
'pytorch-ic-resnet152',
'pytorch-ic-resnet18',
'pytorch-ic-resnet34',
'pytorch-ic-resnet50',
'pytorch-ic-resnext101-32x8d',
'pytorch-ic-resnext50-32x4d',
'pytorch-ic-shufflenet-v2-x1-0',
'pytorch-ic-squeezenet1-0',
'pytorch-ic-squeezenet1-1',
'pytorch-ic-vgg11',
'pytorch-ic-vgg11-bn',
'pytorch-ic-vgg13',
'pytorch-ic-vgg13-bn',
'pytorch-ic-vgg16',
'pytorch-ic-vgg16-bn',
'pytorch-ic-vgg19',
'pytorch-ic-vgg19-bn',
'pytorch-ic-wide-resnet101-2',
'pytorch-ic-wide-resnet50-2']
all_models
(created above using list_jumpstart_models()
), and parse it.1
2
3
4
5
6
7
8
# Get a list of frameworks
frameworks = []
for all_model in all_models:
parts = all_model.split('-')
if parts[0] not in frameworks:
frameworks.append(parts[0])
frameworks
1
2
3
4
5
6
7
8
9
10
['autogluon',
'catboost',
'huggingface',
'lightgbm',
'model',
'mxnet',
'pytorch',
'sklearn',
'tensorflow',
'xgboost']
model
, if you have a look, they include models from Stability AI and include their famous Stable Diffusion text to image models.1
2
3
4
5
6
7
8
# Get a list of tasks
tasks = []
for all_model in all_models:
parts = all_model.split('-')
if parts[1] not in tasks:
tasks.append(parts[1])
tasks
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
['classification',
'regression',
'eqa',
'fillmask',
'ner',
'spc',
'summarization',
'tc',
'text2text',
'textgeneration',
'translation',
'txt2img',
'zstc',
'inpainting',
'upscaling',
'is',
'od',
'semseg',
'tcembedding',
'ic',
'od1',
'tabtransformerclassification',
'tabtransformerregression',
'textgeneration1',
'audioembedding',
'icembedding']
1
2
3
4
filter_value = And("task == txt2img", "framework == model")
filtered_models = list_jumpstart_models(filter=filter_value)
filtered_models
1
2
3
4
5
['model-txt2img-stabilityai-stable-diffusion-v1-4',
'model-txt2img-stabilityai-stable-diffusion-v1-4-fp16',
'model-txt2img-stabilityai-stable-diffusion-v2',
'model-txt2img-stabilityai-stable-diffusion-v2-1-base',
'model-txt2img-stabilityai-stable-diffusion-v2-fp16']
1
2
model_id = 'model-txt2img-stabilityai-stable-diffusion-v2-1-base' # Replace with the model of your choice.
model_version = "*" # Latest
1
2
scope = 'training' # training | inference
instance_type = 'ml.p3.2xlarge' # https://aws.amazon.com/sagemaker/pricing/instance-types
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
image_uri = image_uris.retrieve(
region=None,
framework=None, # automatically inferred from model_id
image_scope=scope,
model_id=model_id,
model_version=model_version,
instance_type=instance_type,
)
print("image_uri: {}".format(image_uri))
source_uri = script_uris.retrieve(
model_id=model_id, model_version=model_version, script_scope=scope
)
print("source_uri: {}".format(source_uri))
model_uri = model_uris.retrieve(
model_id=model_id, model_version=model_version, model_scope=scope
)
print("model_uri: {}".format(model_uri))
1
2
3
4
5
image_uri: 763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-training:1.10.2-transformers4.17.0-gpu-py38-cu113-ubuntu20.04
source_uri: s3://jumpstart-cache-prod-us-east-1/source-directory-tarballs/stabilityai/transfer_learning/txt2img/prepack/v1.0.3/sourcedir.tar.gz
model_uri: s3://jumpstart-cache-prod-us-east-1/stabilityai-training/train-model-txt2img-stabilityai-stable-diffusion-v2-1-base.tar.gz
image_uri
is the location in the Amazon Elastic Container Registry for a container image that is compatible with the model we select.source_uri
is the location in S3 of pre-written code to perform inference or training (when supported) as per the scope we selected. Feel free to download this code, take a look, and even make some changes. If you do change the code you will need to re-compress (tar.gz) and make it available in an S3 bucket you control.model_uri
is the location in S3 of the model itself.1
2
3
4
5
training_hyperparameters = hyperparameters.retrieve_default(
model_id=model_id, model_version=model_version
)
training_hyperparameters
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
{'epochs': '20',
'max_steps': 'None',
'batch_size': '1',
'with_prior_preservation': 'False',
'num_class_images': '100',
'learning_rate': '2e-06',
'prior_loss_weight': '1.0',
'center_crop': 'False',
'lr_scheduler': 'constant',
'adam_weight_decay': '0.01',
'adam_beta1': '0.9',
'adam_beta2': '0.999',
'adam_epsilon': '1e-08',
'gradient_accumulation_steps': '1',
'max_grad_norm': '1.0',
'seed': '0'}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from sagemaker.estimator import Estimator
aws_role = get_execution_role()
sess = sagemaker.Session()
bucket = sess.default_bucket()
name = name_from_base("jumpstart-{}-{}".format(model_id, scope))
# Create SageMaker Estimator instance
estimator = Estimator(
role=aws_role,
image_uri=image_uri,
source_dir=source_uri,
model_uri=model_uri,
# Entry-point present in source_uri, if in doubt,
# download the script package and review contents.
entry_point="train.py",
instance_count=1,
instance_type=instance_type,
max_run=360000,
hyperparameters=training_hyperparameters,
output_path=bucket,
base_job_name=name,
)
1
estimator.fit({"training": training_dataset_s3_path}, logs=True)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from sagemaker.estimator import Model
from sagemaker.predictor import Predictor
aws_role = get_execution_role()
name = name_from_base("jumpstart-{}-{}".format(model_id, scope))
# Create the SageMaker model instance
model = Model(
image_uri=image_uri,
source_dir=source_uri,
model_data=model_uri,
# Entry-point present in source_uri, if in doubt,
# download the script package and review contents.
entry_point="inference.py",
role=aws_role,
predictor_cls=Predictor,
name=name,
)
deploy
on the model object. We pass in the number of instances we want. This step will take a few minutes while the infrastructure is deployed.1
2
3
4
5
6
7
8
9
# Deploy the Model. Note that we need to pass Predictor class when we deploy model through Model class,
# for being able to run inference through the SageMaker API.
model_predictor = model.deploy(
initial_instance_count=1,
instance_type=instance_type,
predictor_cls=Predictor,
endpoint_name=name,
)
1
2
3
# Delete the SageMaker endpoint
model_predictor.delete_endpoint()
model_predictor.delete_model()
model_predictor
, navigate to Amazon SageMaker in the console, click Inference
from the left hand menu, and use Models
and Endpoints
to delete the resources.1
2
3
4
5
6
7
transformer = model.transformer(instance_count=1, instance_type=instance_type)
transformer.transform(
"s3://[input_bucket]/input/",
content_type="application/json" # Example, again look at the inference code to get this value.
)
transformer.wait()
1
2
# Delete the SageMaker endpoint
transformer.delete_model()
transformer
, navigate to Amazon SageMaker in the console, click Inference
from the left hand menu and use Models
to delete the resource.Any opinions in this post are those of the individual author and may not reflect the opinions of AWS.