Running Spot Instances
1. Save Checkpoints
import torch
def save_checkpoint(state, is_best, filename):
if is_best:
print("=> Saving a new best")
torch.save(state, filename)
else:
print("=> Validation Accuracy did not improve")
for epoch in range(epochs):
train(...)
test_accuracy =
test_accuracy = torch.FloatTensor([test_accuracy])
is_best = bool(test_accuracy.numpy() > best_accuracy.numpy())
best_accuracy = torch.FloatTensor(
max(test_accuracy.numpy(), best_accuracy.numpy()))
save_checkpoint({
'epoch': start_epoch + epoch + 1,
'state_dict': model.state_dict(),
'best_accuracy': best_accuracy,
}, is_best, checkpoint_file_path)2. Load Checkpoints
3. Use the spot instance option

Last updated