adding serverless endpoints back

huggingface · Dec 12, 2024 · 858d3d1 · 858d3d1
1 parent f62cc89
commit 858d3d1
Show file tree

Hide file tree

Showing 3 changed files with 20 additions and 14 deletions.
diff --git a/...es/model_configs/endpoint_model_lite.yaml → examples/model_configs/serverless_model.yaml b/...es/model_configs/endpoint_model_lite.yaml → examples/model_configs/serverless_model.yaml
diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py
@@ -33,10 +33,10 @@
 TOKEN = os.getenv("HF_TOKEN")
 CACHE_DIR: str = os.getenv("HF_HOME", "/scratch")
 
-HELP_PANNEL_NAME_1 = "Common Paramaters"
+HELP_PANNEL_NAME_1 = "Common Parameters"
 HELP_PANNEL_NAME_2 = "Logging Parameters"
-HELP_PANNEL_NAME_3 = "Debug Paramaters"
-HELP_PANNEL_NAME_4 = "Modeling Paramaters"
+HELP_PANNEL_NAME_3 = "Debug Parameters"
+HELP_PANNEL_NAME_4 = "Modeling Parameters"
 
 
 @app.command(rich_help_panel="Evaluation Backends")
@@ -93,7 +93,7 @@ def openai(
     Evaluate OPENAI models.
     """
     from lighteval.logging.evaluation_tracker import EvaluationTracker
-    from lighteval.models.model_config import OpenAIModelConfig
+    from lighteval.models.endpoints.openai_model import OpenAIModelConfig
     from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters
 
     env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir)
@@ -147,9 +147,10 @@ def inference_endpoint(
     ],
     tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
     free_endpoint: Annotated[
-        str,
-        Argument(
-            help="True if you want to use the serverless free endpoints, False (default) if you want to spin up your own inference endpoint."
+        bool,
+        Option(
+            help="Use serverless free endpoints instead of spinning up your own inference endpoint.",
+            rich_help_panel=HELP_PANNEL_NAME_4,
         ),
     ] = False,
     # === Common parameters ===

diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py
@@ -76,11 +76,11 @@
 
 @dataclass
 class ServerlessEndpointModelConfig:
-    model: str
+    model_name: str
     add_special_tokens: bool = True
 
     @classmethod
-    def from_path(cls, path: str) -> "InferenceEndpointModelConfig":
+    def from_path(cls, path: str) -> "ServerlessEndpointModelConfig":
         import yaml
 
         with open(path, "r") as f:
@@ -282,10 +282,10 @@ def __init__(  # noqa: C901
         else:  # Free inference client
             self.endpoint = None
             self.endpoint_name = None
-            self.name = config.model
+            self.name = config.model_name
             self.revision = "default"
-            self.async_client = AsyncInferenceClient(model=config.model, token=env_config.token)
-            self.client = InferenceClient(model=config.model, token=env_config.token)
+            self.async_client = AsyncInferenceClient(model=config.model_name, token=env_config.token)
+            self.client = InferenceClient(model=config.model_name, token=env_config.token)
 
         self.use_async = True  # set to False for debug - async use is faster
 
@@ -295,7 +295,7 @@ def __init__(  # noqa: C901
         self.model_info = ModelInfo(
             model_name=self.name,
             model_sha=self.revision,
-            model_dtype=config.model_dtype or "default",
+            model_dtype=getattr(config, "model_dtype", "default"),
             model_size=-1,
         )
 
@@ -547,7 +547,12 @@ def loglikelihood(
                     cont_toks = torch.tensor(cur_request.tokenized_continuation)
                     len_choice = len(cont_toks)
 
-                    logits = [t.logprob for t in response.details.prefill[-len_choice:] if t.logprob is not None]
+                    if self.endpoint:  # inference endpoint
+                        logits = [
+                            t.logprob for t in response.details.prefill[-len_choice:] if t.logprob is not None
+                        ]  # to check
+                    else:  # serverless endpoint
+                        logits = [t.logprob for t in response.details.tokens[-len_choice:] if t.logprob is not None]
 
                     greedy_tokens = torch.tensor(logits).argmax(dim=-1)
                     max_equal = (greedy_tokens == cont_toks).all().squeeze(0)