Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SetFit Deployment on Amazon SageMaker #537

Open
Overfitter opened this issue Jun 28, 2024 · 1 comment
Open

SetFit Deployment on Amazon SageMaker #537

Overfitter opened this issue Jun 28, 2024 · 1 comment

Comments

@Overfitter
Copy link

Overfitter commented Jun 28, 2024

Can someone please provide a guide, how to deploy the SetFit model on Amazon SageMaker. I think HuggingFace doesn't support this. I have tried following the script but it's not working.

Note: SetFit model is in .safetensor format which I created using setfit==1.0.3 and transformers===4.39.0

from sagemaker.huggingface.model import HuggingFaceModel
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data="s3://path/model.tar.gz",  # path to your trained SageMaker model
   role=role,                                            # IAM role with permissions to create an endpoint
   transformers_version="4.37",                           # Transformers version used
   pytorch_version="2.1",                                # PyTorch version used
   py_version='py310',                                    # Python version used
   # transformers_version="4.26",
   # pytorch_version="1.13",
   # py_version='py39',
   entry_point="model/code/inference.py"
)
# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
    endpoint_name=endpoint_name,
    initial_instance_count=1,
   instance_type="ml.g4dn.xlarge",
)
import json

data = {
   "inputs": ["Camera - You are awarded a SiPix Digital Camera! call 09061221066 fromm landline. Delivery within 28 days."]
}


# request
predictor.predict(data)

Here is the inference script

import subprocess
import sys

def manage_packages(transformers_version="4.39.0", setfit_version="1.0.3"):
    """
    Uninstall existing transformers package and install specified versions of transformers and setfit.

    Args:
        transformers_version (str): The version of the transformers package to install.
        setfit_version (str): The version of the setfit package to install.
    """
    try:
        # Uninstall existing transformers package
        subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "-y", "transformers"])
        print("Transformers uninstalled successfully.")
                
        # Install specified version of setfit package
        subprocess.check_call([sys.executable, "-m", "pip", "install", f"setfit=={setfit_version}"])
        print(f"SetFit {setfit_version} installed successfully.")
        
        # Install specified version of transformers package
        subprocess.check_call([sys.executable, "-m", "pip", "install", f"transformers=={transformers_version}"])
        print(f"Transformers {transformers_version} installed successfully.")

    
    except subprocess.CalledProcessError as e:
        print(f"Error during package management: {e}")
        sys.exit(1)

manage_packages()

import ast
import torch
from sagemaker_inference import encoder, decoder
from setfit import SetFitModel


def model_fn(model_dir):
    model = SetFitModel.from_pretrained(model_dir)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    print(f"model loaded successfully {model}")
    return model


def input_fn(input_data, content_type):
    """A default input_fn that can handle JSON, CSV and NPZ formats.

    Args:
        input_data: the request payload serialized in the content_type format
        content_type: the request content_type

    Returns: input_data deserialized into the expected format. Currently expected
        format is {"inputs": ["q1", "q2", ...]}
    """
    decoded = None
    try:
        print(f"input_data: {input_data}, content_type: {content_type}")
        decoded = decoder.decode(input_data, content_type)
        print(f"decoded input: {decoded}, content_type: {content_type}")
        return ast.literal_eval(str(decoded))
    except Exception as e:
        print(f"invalid input. input: {decoded}, error: {e}")
        raise e


def output_fn(prediction, accept):
    """A default output_fn for PyTorch. Serializes predictions from predict_fn to JSON, CSV or NPY format.

    Args:
        prediction: a prediction result from predict_fn
        accept: type which the output data needs to be serialized

    Returns: output data serialized
    """
    print(f"prediction: {prediction}, prediction type: {type(prediction)}, accept: {accept}")
    encoded = encoder.encode(prediction, accept)
    print(f"encoded output: {encoded}, content_type: {accept}")
    return encoded


def predict_fn(data, model):
    """A default predict_fn for PyTorch. Calls a model on data deserialized in input_fn.
    Runs prediction on GPU if cuda is available.

    Args:
        data: input data for prediction deserialized by input_fn
        model: PyTorch model loaded in memory by model_fn

    Returns: a prediction
    """
    try:
        print(f"data: {data}, data_type: {type(data)}")
        inputs = data.get("inputs", None)
        if inputs is None:
            raise Exception(f"\"inputs\" not found: {data}")
        return model.predict(inputs)
    except Exception as e:
        print(f"predict_fn error: {e}")
        raise e
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants