Skip to content

Commit

Permalink
add configs with their models (#421)
Browse files Browse the repository at this point in the history
* add configs with their models

* fix tests

* doc update

* doc update

* fix path
  • Loading branch information
clefourrier authored Dec 10, 2024
1 parent 412ccfc commit 9315f0d
Show file tree
Hide file tree
Showing 20 changed files with 342 additions and 378 deletions.
4 changes: 1 addition & 3 deletions docs/source/_toctree.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@
- local: package_reference/evaluation_tracker
title: EvaluationTracker
- local: package_reference/models
title: Models
- local: package_reference/model_config
title: ModelConfig
title: Models and ModelConfigs
- local: package_reference/pipeline
title: Pipeline
title: Main classes
Expand Down
10 changes: 0 additions & 10 deletions docs/source/package_reference/model_config.mdx

This file was deleted.

32 changes: 23 additions & 9 deletions docs/source/package_reference/models.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,38 @@
### LightevalModel
[[autodoc]] models.abstract_model.LightevalModel


## Accelerate and Transformers Models
### BaseModel
[[autodoc]] models.base_model.BaseModel
[[autodoc]] models.transformers.base_model.BaseModelConfig
[[autodoc]] models.transformers.base_model.BaseModel

### AdapterModel
[[autodoc]] models.adapter_model.AdapterModel
[[autodoc]] models.transformers.adapter_model.AdapterModelConfig
[[autodoc]] models.transformers.adapter_model.AdapterModel

### DeltaModel
[[autodoc]] models.delta_model.DeltaModel
[[autodoc]] models.transformers.delta_model.DeltaModelConfig
[[autodoc]] models.transformers.delta_model.DeltaModel

## Inference Endpoints and TGI Models
## Endpoints-based Models
### InferenceEndpointModel
[[autodoc]] models.endpoint_model.InferenceEndpointModel
### ModelClient
[[autodoc]] models.tgi_model.ModelClient
[[autodoc]] models.endpoints.endpoint_model.InferenceEndpointModelConfig
[[autodoc]] models.endpoints.endpoint_model.InferenceModelConfig
[[autodoc]] models.endpoints.endpoint_model.InferenceEndpointModel

### TGI ModelClient
[[autodoc]] models.endpoints.tgi_model.TGIModelConfig
[[autodoc]] models.endpoints.tgi_model.ModelClient

### Open AI Models
[[autodoc]] models.endpoints.openai_model.OpenAIClient

## Nanotron Model
### NanotronLightevalModel
[[autodoc]] models.nanotron_model.NanotronLightevalModel
[[autodoc]] models.nanotron.nanotron_model.NanotronLightevalModel

## VLLM Model
### VLLMModel
[[autodoc]] models.vllm_model.VLLMModel
[[autodoc]] models.vllm.vllm_model.VLLMModelConfig
[[autodoc]] models.vllm.vllm_model.VLLMModel
4 changes: 3 additions & 1 deletion src/lighteval/main_accelerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ def accelerate( # noqa C901
from accelerate import Accelerator, InitProcessGroupKwargs

from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.model_config import AdapterModelConfig, BaseModelConfig, BitsAndBytesConfig, DeltaModelConfig
from lighteval.models.transformers.adapter_model import AdapterModelConfig
from lighteval.models.transformers.base_model import BaseModelConfig, BitsAndBytesConfig
from lighteval.models.transformers.delta_model import DeltaModelConfig
from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters

accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))])
Expand Down
2 changes: 1 addition & 1 deletion src/lighteval/main_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def inference_endpoint(
import yaml

from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.model_config import (
from lighteval.models.endpoints.endpoint_model import (
InferenceEndpointModelConfig,
)
from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters
Expand Down
2 changes: 1 addition & 1 deletion src/lighteval/main_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def vllm(
Evaluate models using vllm as backend.
"""
from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.model_config import VLLMModelConfig
from lighteval.models.vllm.vllm_model import VLLMModelConfig
from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters

TOKEN = os.getenv("HF_TOKEN")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@
# inspired by https://github.com/EleutherAI/lm-evaluation-harness/blob/main/lm_eval/models/dummy.py

import random
from dataclasses import dataclass
from typing import Optional

from transformers import AutoTokenizer

from lighteval.models.abstract_model import LightevalModel, ModelInfo
from lighteval.models.model_config import DummyModelConfig
from lighteval.models.model_output import GenerativeResponse, LoglikelihoodResponse, LoglikelihoodSingleTokenResponse
from lighteval.tasks.requests import (
GreedyUntilRequest,
Expand All @@ -39,6 +39,11 @@
from lighteval.utils.utils import EnvConfig


@dataclass
class DummyModelConfig:
seed: int = 42


class DummyModel(LightevalModel):
"""Dummy model to generate random baselines."""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
import logging
import re
import time
from typing import Coroutine, List, Optional, Union
from dataclasses import dataclass
from typing import Coroutine, Dict, List, Optional, Union

import requests
import torch
Expand All @@ -47,7 +48,6 @@

from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset
from lighteval.models.abstract_model import LightevalModel, ModelInfo
from lighteval.models.model_config import InferenceEndpointModelConfig, InferenceModelConfig
from lighteval.models.model_output import GenerativeResponse, LoglikelihoodResponse, LoglikelihoodSingleTokenResponse
from lighteval.tasks.requests import (
GreedyUntilRequest,
Expand All @@ -74,6 +74,59 @@
]


@dataclass
class InferenceModelConfig:
model: str
add_special_tokens: bool = True


@dataclass
class InferenceEndpointModelConfig:
endpoint_name: str = None
model_name: str = None
should_reuse_existing: bool = False
accelerator: str = "gpu"
model_dtype: str = None # if empty, we use the default
vendor: str = "aws"
region: str = "us-east-1" # this region has the most hardware options available
instance_size: str = None # if none, we autoscale
instance_type: str = None # if none, we autoscale
framework: str = "pytorch"
endpoint_type: str = "protected"
add_special_tokens: bool = True
revision: str = "main"
namespace: str = None # The namespace under which to launch the endopint. Defaults to the current user's namespace
image_url: str = None
env_vars: dict = None

def __post_init__(self):
# xor operator, one is None but not the other
if (self.instance_size is None) ^ (self.instance_type is None):
raise ValueError(
"When creating an inference endpoint, you need to specify explicitely both instance_type and instance_size, or none of them for autoscaling."
)

if not (self.endpoint_name is None) ^ int(self.model_name is None):
raise ValueError("You need to set either endpoint_name or model_name (but not both).")

def get_dtype_args(self) -> Dict[str, str]:
if self.model_dtype is None:
return {}
model_dtype = self.model_dtype.lower()
if model_dtype in ["awq", "eetq", "gptq"]:
return {"QUANTIZE": model_dtype}
if model_dtype == "8bit":
return {"QUANTIZE": "bitsandbytes"}
if model_dtype == "4bit":
return {"QUANTIZE": "bitsandbytes-nf4"}
if model_dtype in ["bfloat16", "float16"]:
return {"DTYPE": model_dtype}
return {}

def get_custom_env_vars(self) -> Dict[str, str]:
return {k: str(v) for k, v in self.env_vars.items()} if self.env_vars else {}


class InferenceEndpointModel(LightevalModel):
"""InferenceEndpointModels can be used both with the free inference client, or with inference
endpoints, which will use text-generation-inference to deploy your model for the duration of the evaluation.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,14 @@
import os
import time
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
from typing import Optional

from tqdm import tqdm

from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset
from lighteval.models.abstract_model import LightevalModel
from lighteval.models.endpoint_model import ModelInfo
from lighteval.models.endpoints.endpoint_model import ModelInfo
from lighteval.models.model_output import (
GenerativeResponse,
LoglikelihoodResponse,
Expand Down Expand Up @@ -58,6 +59,11 @@
logging.getLogger("httpx").setLevel(logging.ERROR)


@dataclass
class OpenAIModelConfig:
model: str


class OpenAIClient(LightevalModel):
_DEFAULT_MAX_LENGTH: int = 4096

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,14 @@
# SOFTWARE.

import asyncio
from dataclasses import dataclass
from typing import Coroutine, Optional

import requests
from huggingface_hub import TextGenerationInputGrammarType, TextGenerationOutput
from transformers import AutoTokenizer

from lighteval.models.endpoint_model import InferenceEndpointModel, ModelInfo
from lighteval.models.endpoints.endpoint_model import InferenceEndpointModel, ModelInfo
from lighteval.utils.imports import NO_TGI_ERROR_MSG, is_tgi_available


Expand All @@ -44,6 +45,13 @@ def divide_chunks(array, n):
yield array[i : i + n]


@dataclass
class TGIModelConfig:
inference_server_address: str
inference_server_auth: str
model_id: str


# inherit from InferenceEndpointModel instead of LightevalModel since they both use the same interface, and only overwrite
# the client functions, since they use a different client.
class ModelClient(InferenceEndpointModel):
Expand Down
Loading

0 comments on commit 9315f0d

Please sign in to comment.