
Run a PyTorch Model on AWS Inferentia2
Build a simple multilayer perceptron (MLP) model in PyTorch and run it on AWS Inferentia2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
class HousingPriceModel(nn.Module):
def __init__(self, input_size):
super().__init__()
self.layers = nn.Sequential(
nn.Linear(input_size, 256),
nn.ReLU(),
nn.Linear(256, 128),
nn.ReLU(),
nn.Linear(128, 64),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(64, 1) # predict a single value
)
def forward(self, x):
return self.layers(x)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def train_model(model, train_loader, val_loader, epochs=10, batch_size=16, lr=1e-3):
loss_fn = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=lr)
for epoch in range(epochs):
train_loss, val_loss = 0.0, 0.0
# Train
model.train()
for inputs, target in train_loader:
optimizer.zero_grad()
outputs = model(inputs)
loss = loss_fn(outputs, target.unsqueeze(1))
loss.backward()
optimizer.step()
train_loss += loss.item()
# Validation
model.eval()
with torch.no_grad():
for inputs, target in val_loader:
outputs = model(inputs)
loss = loss_fn(outputs, target.unsqueeze(1))
val_loss += loss.item()
# Calculate metrics
rmse_train_loss = np.sqrt(train_loss / len(train_loader))
rmse_val_loss = np.sqrt(val_loss / len(val_loader))
# Printout metrics
print(f"Epoch [{epoch+1}/{epochs}], RMSE: {rmse_train_loss:.4f}")
print(f"Epoch [{epoch+1}/{epochs}], Validation RMSE: {rmse_val_loss:.4f}")
# Save our checkpoint
checkpoint = {'state_dict': model.state_dict()}
torch.save(checkpoint, 'model.pt')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from sklearn.model_selection import train_test_split
# Download dataset and put in a DataFrame
data = load_dataset("leostelon/california-housing")['train']
df = pd.DataFrame(data)
# Split the DataFrame into training and validation
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)
# Clean the dataset.
train_dataset = clean_dataset(train_df)
val_dataset = clean_dataset(val_df)
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16)
# Get input size
input_size = train_dataset.X.shape[1]
# Get the model
model = HousingPriceModel(input_size)
# Train the model
train_model(model, train_loader, val_loader)
1
2
3
4
5
6
7
8
9
10
11
# Extract a single example
for batch in train_dataset:
# Get the inputs and ignore the targets for now
example_input, _ = batch
# Select the first example and keep the batch dimension
example_input = example_input[0:1]
# We only need the first batch so break the loop
break
# Save the example
torch.save(example_input, 'example_input.pt')
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# Configure Linux for Neuron repository updates
. /etc/os-release
sudo tee /etc/apt/sources.list.d/neuron.list > /dev/null <<EOF
deb https://apt.repos.neuron.amazonaws.com ${VERSION_CODENAME} main
EOF
wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | sudo apt-key add -
# Update OS packages
sudo apt-get update -y
# Install OS headers
sudo apt-get install linux-headers-$(uname -r) -y
# Install git
sudo apt-get install git -y
# install Neuron Driver
sudo apt-get install aws-neuronx-dkms=2.* -y
# Install Neuron Runtime
sudo apt-get install aws-neuronx-collectives=2.* -y
sudo apt-get install aws-neuronx-runtime-lib=2.* -y
# Install Neuron Tools
sudo apt-get install aws-neuronx-tools=2.* -y
# Add PATH
export PATH=/opt/aws/neuron/bin:$PATH
1
2
3
4
5
# Set pip repository pointing to the Neuron repository
python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com
# Install Neuron Compiler and Framework
python -m pip install neuronx-cc==2.* --pre torch-neuronx==2.1.* torchvision
Note: This function is analogous to torch.jit.trace().
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import torch_neuronx
# Use the model definition from the training job.
# If you've been following the tutorial, the input size = 11
model = HousingPriceModel(11)
# Load the checkpoint.
checkpoint = torch.load('model.pt', map_location=torch.device('cpu'))
# Extract the state dictionary
model_state_dict = checkpoint['state_dict']
# Load the state dictionary into the model
model.load_state_dict(model_state_dict)
# Load the example we exported in the previous steps.
example_input = torch.load('example_input.pt')
# Compile the model
model_neuron = torch_neuronx.trace(model, example_input)
# Save the TorchScript for inference
filename = 'model_neuron.pt'
torch.jit.save(model_neuron, filename)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import torch_xla.core.xla_model as xm
# Loads a saved PyTorch model into memory.
model_neuron = torch.jit.load(filename)
# Get an XLA device
device = xm.xla_device()
# Move the model to the XLA device.
# (defaults to a NeuronCore on inf2 instance)
model_neuron = model_neuron.to(device)
def invoke(example):
# Make a prediction using the neuron model.
xla_example = example.to(device)
prediction = model_neuron(example)
# Get the model's prediction, round it to the nearest whole number,
# and adjust it back to the original scale by multiplying by 10,000.
price = round(prediction.item()) * 10000
return {
"house_value": price
}
print(invoke(example_input))
Any opinions in this post are those of the individual author and may not reflect the opinions of AWS.