From 075a26603e40c8ba31ce21e0019c6714cf32413b Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:11:58 +0100 Subject: [PATCH] Update docs about inference endpoints (#432) * Delete type and rename model in endpoint docs * Explain to pass either model_name or endpoint_name+reuse_existing * Fix legacy instance type and size in docs * Minor fix --- ...valuate-the-model-on-a-server-or-container.mdx | 15 +++++++-------- examples/model_configs/endpoint_model.yaml | 8 +++++--- src/lighteval/models/endpoints/endpoint_model.py | 2 +- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/docs/source/evaluate-the-model-on-a-server-or-container.mdx b/docs/source/evaluate-the-model-on-a-server-or-container.mdx index 0d9a7d127..fff5f777c 100644 --- a/docs/source/evaluate-the-model-on-a-server-or-container.mdx +++ b/docs/source/evaluate-the-model-on-a-server-or-container.mdx @@ -26,22 +26,22 @@ __configuration file example:__ ```yaml model: - type: "endpoint" base_params: - endpoint_name: "llama-2-7B-lighteval" # needs to be lower case without special characters - model: "meta-llama/Llama-2-7b-hf" + # Pass either model_name, or endpoint_name and true reuse_existing + # endpoint_name: "llama-2-7B-lighteval" # needs to be lower case without special characters + # reuse_existing: true # defaults to false; if true, ignore all params in instance, and don't delete the endpoint after evaluation + model_name: "meta-llama/Llama-2-7b-hf" revision: "main" dtype: "float16" # can be any of "awq", "eetq", "gptq", "4bit' or "8bit" (will use bitsandbytes), "bfloat16" or "float16" - reuse_existing: false # if true, ignore all params in instance, and don't delete the endpoint after evaluation instance: accelerator: "gpu" region: "eu-west-1" vendor: "aws" - instance_size: "medium" - instance_type: "g5.2xlarge" + instance_type: "nvidia-a10g" + instance_size: "x1" framework: "pytorch" endpoint_type: "protected" - namespace: null # The namespace under which to launch the endopint. Defaults to the current user's namespace + namespace: null # The namespace under which to launch the endpoint. Defaults to the current user's namespace image_url: null # Optionally specify the docker image to use when launching the endpoint model. E.g., launching models with later releases of the TGI container with support for newer models. env_vars: null # Optional environment variables to include when launching the endpoint. e.g., `MAX_INPUT_LENGTH: 2048` @@ -58,7 +58,6 @@ __configuration file example:__ ```yaml model: - type: "tgi" instance: inference_server_address: "" inference_server_auth: null diff --git a/examples/model_configs/endpoint_model.yaml b/examples/model_configs/endpoint_model.yaml index 3cca5c431..79b7eff0b 100644 --- a/examples/model_configs/endpoint_model.yaml +++ b/examples/model_configs/endpoint_model.yaml @@ -1,15 +1,17 @@ model: base_params: - model_name: "meta-llama/Llama-2-7b-hf" # the model name or the endpoint name if reuse_existing is true + # Pass either model_name, or endpoint_name and true reuse_existing + # endpoint_name: "llama-2-7B-lighteval" # needs to be lower case without special characters + # reuse_existing: true # defaults to false; if true, ignore all params in instance, and don't delete the endpoint after evaluation + model_name: "meta-llama/Llama-2-7b-hf" revision: "main" dtype: "float16" # can be any of "awq", "eetq", "gptq", "4bit' or "8bit" (will use bitsandbytes), "bfloat16" or "float16" - reuse_existing: false # if true, ignore all params in instance, and don't delete the endpoint after evaluation instance: accelerator: "gpu" region: "eu-west-1" vendor: "aws" - instance_size: "x1" instance_type: "nvidia-a10g" + instance_size: "x1" framework: "pytorch" endpoint_type: "protected" namespace: null # The namespace under which to launch the endopint. Defaults to the current user's namespace diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py index 112338964..3b08940fb 100644 --- a/src/lighteval/models/endpoints/endpoint_model.py +++ b/src/lighteval/models/endpoints/endpoint_model.py @@ -95,7 +95,7 @@ class InferenceEndpointModelConfig: endpoint_type: str = "protected" add_special_tokens: bool = True revision: str = "main" - namespace: str = None # The namespace under which to launch the endopint. Defaults to the current user's namespace + namespace: str = None # The namespace under which to launch the endpoint. Defaults to the current user's namespace image_url: str = None env_vars: dict = None