From f907a3430ac09647fb2bd08a16ef990762eb66c3 Mon Sep 17 00:00:00 2001 From: Albert Villanova del Moral <8515462+albertvillanova@users.noreply.github.com> Date: Thu, 12 Dec 2024 09:37:21 +0100 Subject: [PATCH] Test inference endpoint model config parsing from path (#434) * Add example model config for existing endpoint * Test InferenceEndpointModelConfig.from_path * Comment default main branch in example * Fix typo * Delete unused add_special_tokens param in endpoint example config * Fix typo * Implement InferenceEndpointModelConfig.from_path * Use InferenceEndpointModelConfig.from_path * Refactor InferenceEndpointModelConfig.from_path * Align docs --- ...ate-the-model-on-a-server-or-container.mdx | 4 +- examples/model_configs/endpoint_model.yaml | 6 +- .../endpoint_model_reuse_existing.yaml | 5 ++ src/lighteval/main_endpoint.py | 25 +----- .../models/endpoints/endpoint_model.py | 11 ++- tests/models/test_endpoint_model.py | 85 +++++++++++++++++++ 6 files changed, 105 insertions(+), 31 deletions(-) create mode 100644 examples/model_configs/endpoint_model_reuse_existing.yaml create mode 100644 tests/models/test_endpoint_model.py diff --git a/docs/source/evaluate-the-model-on-a-server-or-container.mdx b/docs/source/evaluate-the-model-on-a-server-or-container.mdx index fff5f777c..23c658b4e 100644 --- a/docs/source/evaluate-the-model-on-a-server-or-container.mdx +++ b/docs/source/evaluate-the-model-on-a-server-or-container.mdx @@ -31,7 +31,7 @@ model: # endpoint_name: "llama-2-7B-lighteval" # needs to be lower case without special characters # reuse_existing: true # defaults to false; if true, ignore all params in instance, and don't delete the endpoint after evaluation model_name: "meta-llama/Llama-2-7b-hf" - revision: "main" + # revision: "main" # defaults to "main" dtype: "float16" # can be any of "awq", "eetq", "gptq", "4bit' or "8bit" (will use bitsandbytes), "bfloat16" or "float16" instance: accelerator: "gpu" @@ -45,8 +45,6 @@ model: image_url: null # Optionally specify the docker image to use when launching the endpoint model. E.g., launching models with later releases of the TGI container with support for newer models. env_vars: null # Optional environment variables to include when launching the endpoint. e.g., `MAX_INPUT_LENGTH: 2048` - generation: - add_special_tokens: true ``` ### Text Generation Inference (TGI) diff --git a/examples/model_configs/endpoint_model.yaml b/examples/model_configs/endpoint_model.yaml index 79b7eff0b..08cb5fac3 100644 --- a/examples/model_configs/endpoint_model.yaml +++ b/examples/model_configs/endpoint_model.yaml @@ -4,7 +4,7 @@ model: # endpoint_name: "llama-2-7B-lighteval" # needs to be lower case without special characters # reuse_existing: true # defaults to false; if true, ignore all params in instance, and don't delete the endpoint after evaluation model_name: "meta-llama/Llama-2-7b-hf" - revision: "main" + # revision: "main" # defaults to "main" dtype: "float16" # can be any of "awq", "eetq", "gptq", "4bit' or "8bit" (will use bitsandbytes), "bfloat16" or "float16" instance: accelerator: "gpu" @@ -14,9 +14,7 @@ model: instance_size: "x1" framework: "pytorch" endpoint_type: "protected" - namespace: null # The namespace under which to launch the endopint. Defaults to the current user's namespace + namespace: null # The namespace under which to launch the endpoint. Defaults to the current user's namespace image_url: null # Optionally specify the docker image to use when launching the endpoint model. E.g., launching models with later releases of the TGI container with support for newer models. env_vars: null # Optional environment variables to include when launching the endpoint. e.g., `MAX_INPUT_LENGTH: 2048` - generation: - add_special_tokens: true diff --git a/examples/model_configs/endpoint_model_reuse_existing.yaml b/examples/model_configs/endpoint_model_reuse_existing.yaml new file mode 100644 index 000000000..8b47354d2 --- /dev/null +++ b/examples/model_configs/endpoint_model_reuse_existing.yaml @@ -0,0 +1,5 @@ +model: + base_params: + # Pass either model_name, or endpoint_name and true reuse_existing + endpoint_name: "llama-2-7B-lighteval" # needs to be lower case without special characters + reuse_existing: true # defaults to false; if true, ignore all params in instance, and don't delete the endpoint after evaluation diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py index 4b31f0f2d..952aae074 100644 --- a/src/lighteval/main_endpoint.py +++ b/src/lighteval/main_endpoint.py @@ -198,7 +198,6 @@ def inference_endpoint( """ Evaluate models using inference-endpoints as backend. """ - import yaml from lighteval.logging.evaluation_tracker import EvaluationTracker from lighteval.models.endpoints.endpoint_model import ( @@ -220,31 +219,11 @@ def inference_endpoint( parallelism_manager = ParallelismManager.NONE # since we're using inference endpoints in remote - with open(model_config_path, "r") as f: - config = yaml.safe_load(f)["model"] - # Find a way to add this back # if config["base_params"].get("endpoint_name", None): # return InferenceModelConfig(model=config["base_params"]["endpoint_name"]) - all_params = { - "model_name": config["base_params"].get("model_name", None), - "endpoint_name": config["base_params"].get("endpoint_name", None), - "model_dtype": config["base_params"].get("dtype", None), - "revision": config["base_params"].get("revision", None) or "main", - "reuse_existing": config["base_params"].get("reuse_existing"), - "accelerator": config.get("instance", {}).get("accelerator", None), - "region": config.get("instance", {}).get("region", None), - "vendor": config.get("instance", {}).get("vendor", None), - "instance_size": config.get("instance", {}).get("instance_size", None), - "instance_type": config.get("instance", {}).get("instance_type", None), - "namespace": config.get("instance", {}).get("namespace", None), - "image_url": config.get("instance", {}).get("image_url", None), - "env_vars": config.get("instance", {}).get("env_vars", None), - } - model_config = InferenceEndpointModelConfig( - # We only initialize params which have a non default value - **{k: v for k, v in all_params.items() if v is not None}, - ) + + model_config = InferenceEndpointModelConfig.from_path(model_config_path) pipeline_params = PipelineParameters( launcher_type=parallelism_manager, diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py index 909d4795d..0bd6cbbc3 100644 --- a/src/lighteval/models/endpoints/endpoint_model.py +++ b/src/lighteval/models/endpoints/endpoint_model.py @@ -103,12 +103,21 @@ def __post_init__(self): # xor operator, one is None but not the other if (self.instance_size is None) ^ (self.instance_type is None): raise ValueError( - "When creating an inference endpoint, you need to specify explicitely both instance_type and instance_size, or none of them for autoscaling." + "When creating an inference endpoint, you need to specify explicitly both instance_type and instance_size, or none of them for autoscaling." ) if not (self.endpoint_name is None) ^ int(self.model_name is None): raise ValueError("You need to set either endpoint_name or model_name (but not both).") + @classmethod + def from_path(cls, path: str) -> "InferenceEndpointModelConfig": + import yaml + + with open(path, "r") as f: + config = yaml.safe_load(f)["model"] + config["base_params"]["model_dtype"] = config["base_params"].pop("dtype", None) + return cls(**config["base_params"], **config.get("instance", {})) + def get_dtype_args(self) -> Dict[str, str]: if self.model_dtype is None: return {} diff --git a/tests/models/test_endpoint_model.py b/tests/models/test_endpoint_model.py new file mode 100644 index 000000000..29fbb3c48 --- /dev/null +++ b/tests/models/test_endpoint_model.py @@ -0,0 +1,85 @@ +# MIT License + +# Copyright (c) 2024 The HuggingFace Team + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import pytest + +from lighteval.models.endpoints.endpoint_model import InferenceEndpointModelConfig + + +# "examples/model_configs/endpoint_model.yaml" + + +class TestInferenceEndpointModelConfig: + @pytest.mark.parametrize( + "config_path, expected_config", + [ + ( + "examples/model_configs/endpoint_model.yaml", + { + "model_name": "meta-llama/Llama-2-7b-hf", + "revision": "main", + "model_dtype": "float16", + "endpoint_name": None, + "reuse_existing": False, + "accelerator": "gpu", + "region": "eu-west-1", + "vendor": "aws", + "instance_type": "nvidia-a10g", + "instance_size": "x1", + "framework": "pytorch", + "endpoint_type": "protected", + "namespace": None, + "image_url": None, + "env_vars": None, + }, + ), + ( + "examples/model_configs/endpoint_model_lite.yaml", + { + "model_name": "meta-llama/Llama-3.1-8B-Instruct", + # Defaults: + "revision": "main", + "model_dtype": None, + "endpoint_name": None, + "reuse_existing": False, + "accelerator": "gpu", + "region": "us-east-1", + "vendor": "aws", + "instance_type": None, + "instance_size": None, + "framework": "pytorch", + "endpoint_type": "protected", + "namespace": None, + "image_url": None, + "env_vars": None, + }, + ), + ( + "examples/model_configs/endpoint_model_reuse_existing.yaml", + {"endpoint_name": "llama-2-7B-lighteval", "reuse_existing": True}, + ), + ], + ) + def test_from_path(self, config_path, expected_config): + config = InferenceEndpointModelConfig.from_path(config_path) + for key, value in expected_config.items(): + assert getattr(config, key) == value