From 93a056fe4249281107baafc2a936e34f7f59e111 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Thu, 12 Dec 2024 16:30:45 +0100
Subject: [PATCH 1/2] Fix imports from model_config (#443)

---
 docs/source/using-the-python-api.mdx | 2 +-
 src/lighteval/main_endpoint.py       | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/using-the-python-api.mdx b/docs/source/using-the-python-api.mdx
index 2e160a679..8c44050f4 100644
--- a/docs/source/using-the-python-api.mdx
+++ b/docs/source/using-the-python-api.mdx
@@ -11,7 +11,7 @@ After that, simply run the pipeline and save the results.
 ```python
 import lighteval
 from lighteval.logging.evaluation_tracker import EvaluationTracker
-from lighteval.models.model_config import VLLMModelConfig
+from lighteval.models.vllm.vllm_model import VLLMModelConfig
 from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
 from lighteval.utils.utils import EnvConfig
 from lighteval.utils.imports import is_accelerate_available
diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py
index 952aae074..be75b711a 100644
--- a/src/lighteval/main_endpoint.py
+++ b/src/lighteval/main_endpoint.py
@@ -93,7 +93,7 @@ def openai(
     Evaluate OPENAI models.
     """
     from lighteval.logging.evaluation_tracker import EvaluationTracker
-    from lighteval.models.model_config import OpenAIModelConfig
+    from lighteval.models.endpoints.openai_model import OpenAIModelConfig
     from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters
 
     env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir)
@@ -317,7 +317,7 @@ def tgi(
     import yaml
 
     from lighteval.logging.evaluation_tracker import EvaluationTracker
-    from lighteval.models.model_config import TGIModelConfig
+    from lighteval.models.endpoints.tgi_model import TGIModelConfig
     from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters
 
     env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir)

From 500632a097499fc7a2002ad682d605cb03072302 Mon Sep 17 00:00:00 2001
From: Albert Villanova del Moral
 <8515462+albertvillanova@users.noreply.github.com>
Date: Mon, 16 Dec 2024 14:10:21 +0100
Subject: [PATCH 2/2] Fix wrong instructions and code for custom tasks (#450)

* Delete wrong instruction in custom task docs

* Delete wrong code for custom tasks

* Delete wrong code for extended tasks

* Delete wrong code for community tasks

* Delete unnecessary code for community tasks
---
 community_tasks/_template.py                         |  7 -------
 community_tasks/aimo_evals.py                        |  8 --------
 community_tasks/arabic_evals.py                      |  4 ----
 community_tasks/german_rag_evals.py                  |  8 --------
 community_tasks/oz_evals.py                          |  5 -----
 community_tasks/serbian_eval.py                      |  4 ----
 docs/source/adding-a-custom-task.mdx                 | 11 -----------
 examples/nanotron/custom_evaluation_tasks.py         |  4 ----
 src/lighteval/tasks/extended/ifeval/main.py          |  5 -----
 src/lighteval/tasks/extended/mix_eval/main.py        |  5 -----
 src/lighteval/tasks/extended/mt_bench/main.py        |  4 ----
 src/lighteval/tasks/extended/tiny_benchmarks/main.py |  8 --------
 12 files changed, 73 deletions(-)

diff --git a/community_tasks/_template.py b/community_tasks/_template.py
index d0099ba26..2db28e340 100644
--- a/community_tasks/_template.py
+++ b/community_tasks/_template.py
@@ -116,10 +116,3 @@ def __init__(
     sample_level_fn=lambda x: x,  # how to compute score for one sample
     corpus_level_fn=np.mean,  # aggregation
 )
-
-# MODULE LOGIC
-# You should not need to touch this
-# Convert to dict for lighteval
-if __name__ == "__main__":
-    print(t.name for t in TASKS_TABLE)
-    print(len(TASKS_TABLE))
diff --git a/community_tasks/aimo_evals.py b/community_tasks/aimo_evals.py
index be59950bd..885ffd8da 100644
--- a/community_tasks/aimo_evals.py
+++ b/community_tasks/aimo_evals.py
@@ -56,11 +56,3 @@ def aimo_prompt(line, task_name: str = None):
 
 # STORE YOUR EVALS
 TASKS_TABLE = [task]
-
-
-# MODULE LOGIC
-# You should not need to touch this
-
-if __name__ == "__main__":
-    print(t.name for t in TASKS_TABLE)
-    print(len(TASKS_TABLE))
diff --git a/community_tasks/arabic_evals.py b/community_tasks/arabic_evals.py
index 07a096eca..4408f22fa 100644
--- a/community_tasks/arabic_evals.py
+++ b/community_tasks/arabic_evals.py
@@ -856,7 +856,3 @@ def __init__(
     + [toxigen_ar_task]
     + [sciq_ar_task]
 )
-
-if __name__ == "__main__":
-    print(t.name for t in TASKS_TABLE)
-    print(len(TASKS_TABLE))
diff --git a/community_tasks/german_rag_evals.py b/community_tasks/german_rag_evals.py
index 30d6dcb4a..78af6794e 100644
--- a/community_tasks/german_rag_evals.py
+++ b/community_tasks/german_rag_evals.py
@@ -221,11 +221,3 @@ def prompt_fn_context_question_match(line, task_name: str = None):
 
 # STORE YOUR EVALS
 TASKS_TABLE = [task1, task2, task3, task4]
-
-
-# MODULE LOGIC
-# You should not need to touch this
-
-if __name__ == "__main__":
-    print(t.name for t in TASKS_TABLE)
-    print(len(TASKS_TABLE))
diff --git a/community_tasks/oz_evals.py b/community_tasks/oz_evals.py
index 6252a20a0..4ec70e291 100644
--- a/community_tasks/oz_evals.py
+++ b/community_tasks/oz_evals.py
@@ -87,8 +87,3 @@ def prompt_fn_oz_eval_task(line, task_name: str = None):
 
 # STORE YOUR EVALS
 TASKS_TABLE = [oz_eval_task]
-
-
-if __name__ == "__main__":
-    print(t["name"] for t in TASKS_TABLE)
-    print(len(TASKS_TABLE))
diff --git a/community_tasks/serbian_eval.py b/community_tasks/serbian_eval.py
index 3b49c4cb0..d972ac69c 100644
--- a/community_tasks/serbian_eval.py
+++ b/community_tasks/serbian_eval.py
@@ -784,7 +784,3 @@ def create_task_config(
     mmlu_world_religions,
     mmlu_all,
 ]
-
-if __name__ == "__main__":
-    print(t.name for t in TASKS_TABLE)
-    print(len(TASKS_TABLE))
diff --git a/docs/source/adding-a-custom-task.mdx b/docs/source/adding-a-custom-task.mdx
index e1823b7b9..e5160024d 100644
--- a/docs/source/adding-a-custom-task.mdx
+++ b/docs/source/adding-a-custom-task.mdx
@@ -167,17 +167,6 @@ TASKS_TABLE = SUBSET_TASKS
 # TASKS_TABLE = [task]
 ```
 
-Finally, you need to add a module logic to convert your task to a dict for lighteval.
-
-```python
-# MODULE LOGIC
-# You should not need to touch this
-# Convert to dict for lighteval
-if __name__ == "__main__":
-    print(t.name for t in TASKS_TABLE)
-    print(len(TASKS_TABLE))
-```
-
 Once your file is created you can then run the evaluation with the following command:
 
 ```bash
diff --git a/examples/nanotron/custom_evaluation_tasks.py b/examples/nanotron/custom_evaluation_tasks.py
index 78c354916..e4b1b5ca6 100644
--- a/examples/nanotron/custom_evaluation_tasks.py
+++ b/examples/nanotron/custom_evaluation_tasks.py
@@ -671,7 +671,3 @@ def __init__(
     "all": ",".join(t[1] for t in _TASKS_STRINGS),
     "early-signal": EARLY_SIGNAL_TASKS,
 }
-
-if __name__ == "__main__":
-    print(t["name"] for t in TASKS_TABLE)
-    print(len(TASKS_TABLE))
diff --git a/src/lighteval/tasks/extended/ifeval/main.py b/src/lighteval/tasks/extended/ifeval/main.py
index e6947bb6e..60d1be5fa 100644
--- a/src/lighteval/tasks/extended/ifeval/main.py
+++ b/src/lighteval/tasks/extended/ifeval/main.py
@@ -160,8 +160,3 @@ def agg_inst_level_acc(items):
 TASKS_TABLE = [ifeval]
 
 extend_enum(Metrics, "ifeval_metric", ifeval_metrics)
-
-if __name__ == "__main__":
-    # Adds the metric to the metric list!
-    print(t["name"] for t in TASKS_TABLE)
-    print(len(TASKS_TABLE))
diff --git a/src/lighteval/tasks/extended/mix_eval/main.py b/src/lighteval/tasks/extended/mix_eval/main.py
index 15d7490bc..8684e910c 100644
--- a/src/lighteval/tasks/extended/mix_eval/main.py
+++ b/src/lighteval/tasks/extended/mix_eval/main.py
@@ -228,8 +228,3 @@ def mean_dv_5(x):
 
 
 TASKS_TABLE = [mixeval_multichoice_easy, mixeval_freeform_easy, mixeval_multichoice_hard, mixeval_freeform_hard]
-
-if __name__ == "__main__":
-    # Adds the metric to the metric list!
-    print(t["name"] for t in TASKS_TABLE)
-    print(len(TASKS_TABLE))
diff --git a/src/lighteval/tasks/extended/mt_bench/main.py b/src/lighteval/tasks/extended/mt_bench/main.py
index e5b209982..117e363dd 100644
--- a/src/lighteval/tasks/extended/mt_bench/main.py
+++ b/src/lighteval/tasks/extended/mt_bench/main.py
@@ -95,7 +95,3 @@ def flow_judge_mt_bench_prompt(question, answer, options, gold):
 
 
 TASKS_TABLE = [task]
-
-if __name__ == "__main__":
-    print(t["name"] for t in TASKS_TABLE)
-    print(len(TASKS_TABLE))
diff --git a/src/lighteval/tasks/extended/tiny_benchmarks/main.py b/src/lighteval/tasks/extended/tiny_benchmarks/main.py
index b283921f2..fae6e89df 100644
--- a/src/lighteval/tasks/extended/tiny_benchmarks/main.py
+++ b/src/lighteval/tasks/extended/tiny_benchmarks/main.py
@@ -283,11 +283,3 @@ def aggregate(self, y_input):
             corpus_level_fn=TinyCorpusAggregator(name).aggregate,
         ),
     )
-
-
-# MODULE LOGIC
-# You should not need to touch this
-# Convert to dict for lighteval
-if __name__ == "__main__":
-    print(t["name"] for t in TASKS_TABLE)
-    print(len(TASKS_TABLE))