add configs with their models (#421)

* add configs with their models * fix tests * doc update * doc update * fix path
huggingface · Dec 10, 2024 · 9315f0d · 9315f0d
1 parent 412ccfc
commit 9315f0d
Show file tree

Hide file tree

Showing 20 changed files with 342 additions and 378 deletions.
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
@@ -33,9 +33,7 @@
     - local: package_reference/evaluation_tracker
       title: EvaluationTracker
     - local: package_reference/models
-      title: Models
-    - local: package_reference/model_config
-      title: ModelConfig
+      title: Models and ModelConfigs
     - local: package_reference/pipeline
       title: Pipeline
     title: Main classes

diff --git a/docs/source/package_reference/model_config.mdx b/docs/source/package_reference/model_config.mdx
diff --git a/docs/source/package_reference/models.mdx b/docs/source/package_reference/models.mdx
@@ -4,24 +4,38 @@
 ### LightevalModel
 [[autodoc]] models.abstract_model.LightevalModel
 
+
 ## Accelerate and Transformers Models
 ### BaseModel
-[[autodoc]] models.base_model.BaseModel
+[[autodoc]] models.transformers.base_model.BaseModelConfig
+[[autodoc]] models.transformers.base_model.BaseModel
+
 ### AdapterModel
-[[autodoc]] models.adapter_model.AdapterModel
+[[autodoc]] models.transformers.adapter_model.AdapterModelConfig
+[[autodoc]] models.transformers.adapter_model.AdapterModel
+
 ### DeltaModel
-[[autodoc]] models.delta_model.DeltaModel
+[[autodoc]] models.transformers.delta_model.DeltaModelConfig
+[[autodoc]] models.transformers.delta_model.DeltaModel
 
-## Inference Endpoints and TGI Models
+## Endpoints-based Models
 ### InferenceEndpointModel
-[[autodoc]] models.endpoint_model.InferenceEndpointModel
-### ModelClient
-[[autodoc]] models.tgi_model.ModelClient
+[[autodoc]] models.endpoints.endpoint_model.InferenceEndpointModelConfig
+[[autodoc]] models.endpoints.endpoint_model.InferenceModelConfig
+[[autodoc]] models.endpoints.endpoint_model.InferenceEndpointModel
+
+### TGI ModelClient
+[[autodoc]] models.endpoints.tgi_model.TGIModelConfig
+[[autodoc]] models.endpoints.tgi_model.ModelClient
+
+### Open AI Models
+[[autodoc]] models.endpoints.openai_model.OpenAIClient
 
 ## Nanotron Model
 ### NanotronLightevalModel
-[[autodoc]] models.nanotron_model.NanotronLightevalModel
+[[autodoc]] models.nanotron.nanotron_model.NanotronLightevalModel
 
 ## VLLM Model
 ### VLLMModel
-[[autodoc]] models.vllm_model.VLLMModel
+[[autodoc]] models.vllm.vllm_model.VLLMModelConfig
+[[autodoc]] models.vllm.vllm_model.VLLMModel
diff --git a/src/lighteval/main_accelerate.py b/src/lighteval/main_accelerate.py
@@ -107,7 +107,9 @@ def accelerate(  # noqa C901
     from accelerate import Accelerator, InitProcessGroupKwargs
 
     from lighteval.logging.evaluation_tracker import EvaluationTracker
-    from lighteval.models.model_config import AdapterModelConfig, BaseModelConfig, BitsAndBytesConfig, DeltaModelConfig
+    from lighteval.models.transformers.adapter_model import AdapterModelConfig
+    from lighteval.models.transformers.base_model import BaseModelConfig, BitsAndBytesConfig
+    from lighteval.models.transformers.delta_model import DeltaModelConfig
     from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters
 
     accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))])

diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py
@@ -201,7 +201,7 @@ def inference_endpoint(
     import yaml
 
     from lighteval.logging.evaluation_tracker import EvaluationTracker
-    from lighteval.models.model_config import (
+    from lighteval.models.endpoints.endpoint_model import (
         InferenceEndpointModelConfig,
     )
     from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters

diff --git a/src/lighteval/main_vllm.py b/src/lighteval/main_vllm.py
@@ -89,7 +89,7 @@ def vllm(
     Evaluate models using vllm as backend.
     """
     from lighteval.logging.evaluation_tracker import EvaluationTracker
-    from lighteval.models.model_config import VLLMModelConfig
+    from lighteval.models.vllm.vllm_model import VLLMModelConfig
     from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters
 
     TOKEN = os.getenv("HF_TOKEN")

diff --git a/src/lighteval/models/dummy_model.py → src/lighteval/models/dummy/dummy_model.py b/src/lighteval/models/dummy_model.py → src/lighteval/models/dummy/dummy_model.py
@@ -23,12 +23,12 @@
 # inspired by https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/models/dummy.py
 
 import random
+from dataclasses import dataclass
 from typing import Optional
 
 from transformers import AutoTokenizer
 
 from lighteval.models.abstract_model import LightevalModel, ModelInfo
-from lighteval.models.model_config import DummyModelConfig
 from lighteval.models.model_output import GenerativeResponse, LoglikelihoodResponse, LoglikelihoodSingleTokenResponse
 from lighteval.tasks.requests import (
     GreedyUntilRequest,
@@ -39,6 +39,11 @@
 from lighteval.utils.utils import EnvConfig
 
 
+@dataclass
+class DummyModelConfig:
+    seed: int = 42
+
+
 class DummyModel(LightevalModel):
     """Dummy model to generate random baselines."""
 

diff --git a/src/lighteval/models/endpoint_model.py → ...hteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoint_model.py → ...hteval/models/endpoints/endpoint_model.py
@@ -24,7 +24,8 @@
 import logging
 import re
 import time
-from typing import Coroutine, List, Optional, Union
+from dataclasses import dataclass
+from typing import Coroutine, Dict, List, Optional, Union
 
 import requests
 import torch
@@ -47,7 +48,6 @@
 
 from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset
 from lighteval.models.abstract_model import LightevalModel, ModelInfo
-from lighteval.models.model_config import InferenceEndpointModelConfig, InferenceModelConfig
 from lighteval.models.model_output import GenerativeResponse, LoglikelihoodResponse, LoglikelihoodSingleTokenResponse
 from lighteval.tasks.requests import (
     GreedyUntilRequest,
@@ -74,6 +74,59 @@
 ]
 
 
+@dataclass
+class InferenceModelConfig:
+    model: str
+    add_special_tokens: bool = True
+
+
+@dataclass
+class InferenceEndpointModelConfig:
+    endpoint_name: str = None
+    model_name: str = None
+    should_reuse_existing: bool = False
+    accelerator: str = "gpu"
+    model_dtype: str = None  # if empty, we use the default
+    vendor: str = "aws"
+    region: str = "us-east-1"  # this region has the most hardware options available
+    instance_size: str = None  # if none, we autoscale
+    instance_type: str = None  # if none, we autoscale
+    framework: str = "pytorch"
+    endpoint_type: str = "protected"
+    add_special_tokens: bool = True
+    revision: str = "main"
+    namespace: str = None  # The namespace under which to launch the endopint. Defaults to the current user's namespace
+    image_url: str = None
+    env_vars: dict = None
+
+    def __post_init__(self):
+        # xor operator, one is None but not the other
+        if (self.instance_size is None) ^ (self.instance_type is None):
+            raise ValueError(
+                "When creating an inference endpoint, you need to specify explicitely both instance_type and instance_size, or none of them for autoscaling."
+            )
+
+        if not (self.endpoint_name is None) ^ int(self.model_name is None):
+            raise ValueError("You need to set either endpoint_name or model_name (but not both).")
+
+    def get_dtype_args(self) -> Dict[str, str]:
+        if self.model_dtype is None:
+            return {}
+        model_dtype = self.model_dtype.lower()
+        if model_dtype in ["awq", "eetq", "gptq"]:
+            return {"QUANTIZE": model_dtype}
+        if model_dtype == "8bit":
+            return {"QUANTIZE": "bitsandbytes"}
+        if model_dtype == "4bit":
+            return {"QUANTIZE": "bitsandbytes-nf4"}
+        if model_dtype in ["bfloat16", "float16"]:
+            return {"DTYPE": model_dtype}
+        return {}
+
+    def get_custom_env_vars(self) -> Dict[str, str]:
+        return {k: str(v) for k, v in self.env_vars.items()} if self.env_vars else {}
+
+
 class InferenceEndpointModel(LightevalModel):
     """InferenceEndpointModels can be used both with the free inference client, or with inference
     endpoints, which will use text-generation-inference to deploy your model for the duration of the evaluation.

diff --git a/src/lighteval/models/openai_model.py → ...ighteval/models/endpoints/openai_model.py b/src/lighteval/models/openai_model.py → ...ighteval/models/endpoints/openai_model.py
@@ -24,13 +24,14 @@
 import os
 import time
 from concurrent.futures import ThreadPoolExecutor
+from dataclasses import dataclass
 from typing import Optional
 
 from tqdm import tqdm
 
 from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset
 from lighteval.models.abstract_model import LightevalModel
-from lighteval.models.endpoint_model import ModelInfo
+from lighteval.models.endpoints.endpoint_model import ModelInfo
 from lighteval.models.model_output import (
     GenerativeResponse,
     LoglikelihoodResponse,
@@ -58,6 +59,11 @@
     logging.getLogger("httpx").setLevel(logging.ERROR)
 
 
+@dataclass
+class OpenAIModelConfig:
+    model: str
+
+
 class OpenAIClient(LightevalModel):
     _DEFAULT_MAX_LENGTH: int = 4096
 

diff --git a/src/lighteval/models/tgi_model.py → src/lighteval/models/endpoints/tgi_model.py b/src/lighteval/models/tgi_model.py → src/lighteval/models/endpoints/tgi_model.py
@@ -21,13 +21,14 @@
 # SOFTWARE.
 
 import asyncio
+from dataclasses import dataclass
 from typing import Coroutine, Optional
 
 import requests
 from huggingface_hub import TextGenerationInputGrammarType, TextGenerationOutput
 from transformers import AutoTokenizer
 
-from lighteval.models.endpoint_model import InferenceEndpointModel, ModelInfo
+from lighteval.models.endpoints.endpoint_model import InferenceEndpointModel, ModelInfo
 from lighteval.utils.imports import NO_TGI_ERROR_MSG, is_tgi_available
 
 
@@ -44,6 +45,13 @@ def divide_chunks(array, n):
         yield array[i : i + n]
 
 
+@dataclass
+class TGIModelConfig:
+    inference_server_address: str
+    inference_server_auth: str
+    model_id: str
+
+
 # inherit from InferenceEndpointModel instead of LightevalModel since they both use the same interface, and only overwrite
 # the client functions, since they use a different client.
 class ModelClient(InferenceEndpointModel):