Skip to content

Commit

Permalink
Merge branch 'main' into clem_homogeneize_generation_params
Browse files Browse the repository at this point in the history
  • Loading branch information
clefourrier authored Dec 16, 2024
2 parents be99c5e + 500632a commit e8b9057
Show file tree
Hide file tree
Showing 13 changed files with 1 addition and 74 deletions.
7 changes: 0 additions & 7 deletions community_tasks/_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,3 @@ def __init__(
sample_level_fn=lambda x: x, # how to compute score for one sample
corpus_level_fn=np.mean, # aggregation
)

# MODULE LOGIC
# You should not need to touch this
# Convert to dict for lighteval
if __name__ == "__main__":
print(t.name for t in TASKS_TABLE)
print(len(TASKS_TABLE))
8 changes: 0 additions & 8 deletions community_tasks/aimo_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,3 @@ def aimo_prompt(line, task_name: str = None):

# STORE YOUR EVALS
TASKS_TABLE = [task]


# MODULE LOGIC
# You should not need to touch this

if __name__ == "__main__":
print(t.name for t in TASKS_TABLE)
print(len(TASKS_TABLE))
4 changes: 0 additions & 4 deletions community_tasks/arabic_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -856,7 +856,3 @@ def __init__(
+ [toxigen_ar_task]
+ [sciq_ar_task]
)

if __name__ == "__main__":
print(t.name for t in TASKS_TABLE)
print(len(TASKS_TABLE))
8 changes: 0 additions & 8 deletions community_tasks/german_rag_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,11 +221,3 @@ def prompt_fn_context_question_match(line, task_name: str = None):

# STORE YOUR EVALS
TASKS_TABLE = [task1, task2, task3, task4]


# MODULE LOGIC
# You should not need to touch this

if __name__ == "__main__":
print(t.name for t in TASKS_TABLE)
print(len(TASKS_TABLE))
5 changes: 0 additions & 5 deletions community_tasks/oz_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,3 @@ def prompt_fn_oz_eval_task(line, task_name: str = None):

# STORE YOUR EVALS
TASKS_TABLE = [oz_eval_task]


if __name__ == "__main__":
print(t["name"] for t in TASKS_TABLE)
print(len(TASKS_TABLE))
4 changes: 0 additions & 4 deletions community_tasks/serbian_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,3 @@ def create_task_config(
mmlu_world_religions,
mmlu_all,
]

if __name__ == "__main__":
print(t.name for t in TASKS_TABLE)
print(len(TASKS_TABLE))
11 changes: 0 additions & 11 deletions docs/source/adding-a-custom-task.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -167,17 +167,6 @@ TASKS_TABLE = SUBSET_TASKS
# TASKS_TABLE = [task]
```

Finally, you need to add a module logic to convert your task to a dict for lighteval.

```python
# MODULE LOGIC
# You should not need to touch this
# Convert to dict for lighteval
if __name__ == "__main__":
print(t.name for t in TASKS_TABLE)
print(len(TASKS_TABLE))
```

Once your file is created you can then run the evaluation with the following command:

```bash
Expand Down
2 changes: 1 addition & 1 deletion docs/source/using-the-python-api.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ After that, simply run the pipeline and save the results.
```python
import lighteval
from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.model_config import VLLMModelConfig
from lighteval.models.vllm.vllm_model import VLLMModelConfig
from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
from lighteval.utils.utils import EnvConfig
from lighteval.utils.imports import is_accelerate_available
Expand Down
4 changes: 0 additions & 4 deletions examples/nanotron/custom_evaluation_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,7 +671,3 @@ def __init__(
"all": ",".join(t[1] for t in _TASKS_STRINGS),
"early-signal": EARLY_SIGNAL_TASKS,
}

if __name__ == "__main__":
print(t["name"] for t in TASKS_TABLE)
print(len(TASKS_TABLE))
5 changes: 0 additions & 5 deletions src/lighteval/tasks/extended/ifeval/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,3 @@ def agg_inst_level_acc(items):
TASKS_TABLE = [ifeval]

extend_enum(Metrics, "ifeval_metric", ifeval_metrics)

if __name__ == "__main__":
# Adds the metric to the metric list!
print(t["name"] for t in TASKS_TABLE)
print(len(TASKS_TABLE))
5 changes: 0 additions & 5 deletions src/lighteval/tasks/extended/mix_eval/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,8 +228,3 @@ def mean_dv_5(x):


TASKS_TABLE = [mixeval_multichoice_easy, mixeval_freeform_easy, mixeval_multichoice_hard, mixeval_freeform_hard]

if __name__ == "__main__":
# Adds the metric to the metric list!
print(t["name"] for t in TASKS_TABLE)
print(len(TASKS_TABLE))
4 changes: 0 additions & 4 deletions src/lighteval/tasks/extended/mt_bench/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,3 @@ def flow_judge_mt_bench_prompt(question, answer, options, gold):


TASKS_TABLE = [task]

if __name__ == "__main__":
print(t["name"] for t in TASKS_TABLE)
print(len(TASKS_TABLE))
8 changes: 0 additions & 8 deletions src/lighteval/tasks/extended/tiny_benchmarks/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,11 +283,3 @@ def aggregate(self, y_input):
corpus_level_fn=TinyCorpusAggregator(name).aggregate,
),
)


# MODULE LOGIC
# You should not need to touch this
# Convert to dict for lighteval
if __name__ == "__main__":
print(t["name"] for t in TASKS_TABLE)
print(len(TASKS_TABLE))

0 comments on commit e8b9057

Please sign in to comment.