You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Can someone please provide a guide, how to deploy the SetFit model on Amazon SageMaker. I think HuggingFace doesn't support this. I have tried following the script but it's not working.
Note: SetFit model is in .safetensor format which I created using setfit==1.0.3 and transformers===4.39.0
from sagemaker.huggingface.model import HuggingFaceModel
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
model_data="s3://path/model.tar.gz", # path to your trained SageMaker model
role=role, # IAM role with permissions to create an endpoint
transformers_version="4.37", # Transformers version used
pytorch_version="2.1", # PyTorch version used
py_version='py310', # Python version used
# transformers_version="4.26",
# pytorch_version="1.13",
# py_version='py39',
entry_point="model/code/inference.py"
)
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
endpoint_name=endpoint_name,
initial_instance_count=1,
instance_type="ml.g4dn.xlarge",
)
import json
data = {
"inputs": ["Camera - You are awarded a SiPix Digital Camera! call 09061221066 fromm landline. Delivery within 28 days."]
}
# request
predictor.predict(data)
Here is the inference script
import subprocess
import sys
def manage_packages(transformers_version="4.39.0", setfit_version="1.0.3"):
"""
Uninstall existing transformers package and install specified versions of transformers and setfit.
Args:
transformers_version (str): The version of the transformers package to install.
setfit_version (str): The version of the setfit package to install.
"""
try:
# Uninstall existing transformers package
subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "-y", "transformers"])
print("Transformers uninstalled successfully.")
# Install specified version of setfit package
subprocess.check_call([sys.executable, "-m", "pip", "install", f"setfit=={setfit_version}"])
print(f"SetFit {setfit_version} installed successfully.")
# Install specified version of transformers package
subprocess.check_call([sys.executable, "-m", "pip", "install", f"transformers=={transformers_version}"])
print(f"Transformers {transformers_version} installed successfully.")
except subprocess.CalledProcessError as e:
print(f"Error during package management: {e}")
sys.exit(1)
manage_packages()
import ast
import torch
from sagemaker_inference import encoder, decoder
from setfit import SetFitModel
def model_fn(model_dir):
model = SetFitModel.from_pretrained(model_dir)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"model loaded successfully {model}")
return model
def input_fn(input_data, content_type):
"""A default input_fn that can handle JSON, CSV and NPZ formats.
Args:
input_data: the request payload serialized in the content_type format
content_type: the request content_type
Returns: input_data deserialized into the expected format. Currently expected
format is {"inputs": ["q1", "q2", ...]}
"""
decoded = None
try:
print(f"input_data: {input_data}, content_type: {content_type}")
decoded = decoder.decode(input_data, content_type)
print(f"decoded input: {decoded}, content_type: {content_type}")
return ast.literal_eval(str(decoded))
except Exception as e:
print(f"invalid input. input: {decoded}, error: {e}")
raise e
def output_fn(prediction, accept):
"""A default output_fn for PyTorch. Serializes predictions from predict_fn to JSON, CSV or NPY format.
Args:
prediction: a prediction result from predict_fn
accept: type which the output data needs to be serialized
Returns: output data serialized
"""
print(f"prediction: {prediction}, prediction type: {type(prediction)}, accept: {accept}")
encoded = encoder.encode(prediction, accept)
print(f"encoded output: {encoded}, content_type: {accept}")
return encoded
def predict_fn(data, model):
"""A default predict_fn for PyTorch. Calls a model on data deserialized in input_fn.
Runs prediction on GPU if cuda is available.
Args:
data: input data for prediction deserialized by input_fn
model: PyTorch model loaded in memory by model_fn
Returns: a prediction
"""
try:
print(f"data: {data}, data_type: {type(data)}")
inputs = data.get("inputs", None)
if inputs is None:
raise Exception(f"\"inputs\" not found: {data}")
return model.predict(inputs)
except Exception as e:
print(f"predict_fn error: {e}")
raise e
The text was updated successfully, but these errors were encountered:
Can someone please provide a guide, how to deploy the SetFit model on Amazon SageMaker. I think HuggingFace doesn't support this. I have tried following the script but it's not working.
Note: SetFit model is in .safetensor format which I created using setfit==1.0.3 and transformers===4.39.0
Here is the inference script
The text was updated successfully, but these errors were encountered: