Skip to content

Commit

Permalink
feat: enable evaluating from json
Browse files Browse the repository at this point in the history
  • Loading branch information
mariagrandury committed Oct 23, 2024
1 parent a22bd58 commit f51f960
Show file tree
Hide file tree
Showing 6 changed files with 171 additions and 1 deletion.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
# Backend de "La Leaderboard"

- To evaluate the models in the requests dataset, run `python3 -m main_eval_queue.py`
- To evaluate the combination of models and tasks in tasks_todo.json, run `python3 -m main_eval_json.py`
22 changes: 22 additions & 0 deletions internal_queue/model_precision.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"flax-community/gpt-2-spanish": "float32",
"google/gemma-2-9b": "float32",
"google/gemma-2-2b": "float32",
"bertin-project/bertin-gpt-j-6B": "float32",
"gplsi/Aitana-6.3B": "bfloat16",
"projecte-aina/aguila-7b": "float16",
"01-ai/Yi-1.5-9B": "bfloat16",
"microsoft/phi-1_5": "float16",
"occiglot/occiglot-7b-es-en": "float32",
"tiiuae/falcon-7b": "bfloat16",
"HiTZ/latxa-7b-v1.2": "bfloat16",
"meta-llama/Meta-Llama-3.1-8B": "bfloat16",
"mistralai/Mistral-7B-v0.3": "bfloat16",
"projecte-aina/FLOR-6.3B": "float16",
"proxectonos/Carballo-bloom-1.3B": "float16",
"BSC-LT/salamandra-2b": "bfloat16",
"BSC-LT/salamandra-7b": "bfloat16",
"meta-llama/Llama-3.2-1B": "bfloat16",
"meta-llama/Llama-3.2-3B": "bfloat16",
"bertin-project/Gromenauer-7B": "float32"
}
98 changes: 98 additions & 0 deletions internal_queue/tasks_todo.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
{
"google/gemma-2-2b": [
"escola",
"catcola",
"parafraseja",
"paws_ca",
"xnli_ca"
],
"HiTZ/latxa-7b-v1.2": [],
"proxectonos/Carballo-bloom-1.3B": [
"mgsm_direct_es",
"mgsm_direct_ca",
"mgsm_direct_gl"
],
"projecte-aina/aguila-7b": [
"escola",
"mgsm_direct_gl"
],
"projecte-aina/FLOR-6.3B": [],
"gplsi/Aitana-6.3B": [
"escola",
"catcola",
"paws_ca",
"xnli_ca"
],
"occiglot/occiglot-7b-es-en": [
"eus_reading",
"summarization_gl"
],
"bertin-project/bertin-gpt-j-6B": [
"escola",
"mgsm_direct_es",
"catcola",
"mgsm_direct_ca",
"paws_ca",
"xnli_ca",
"mgsm_direct_gl"
],
"meta-llama/Meta-Llama-3.1-8B": [],
"mistralai/Mistral-7B-v0.3": [],
"01-ai/Yi-1.5-9B": [
"escola",
"mgsm_direct_es",
"catcola",
"paws_ca",
"xnli_ca"
],
"microsoft/phi-1_5": [
"escola",
"catcola",
"mgsm_direct_ca",
"paws_ca",
"xnli_ca",
"mgsm_direct_gl"
],
"tiiuae/falcon-7b": [
"escola",
"catcola",
"paws_ca",
"xnli_ca"
],
"bertin-project/Gromenauer-7B": [
"escola",
"mgsm_direct_es",
"catcola",
"paws_ca",
"xnli_ca"
],
"BSC-LT/salamandra-2b": [
"escola",
"mgsm_direct_es",
"catcola",
"paws_ca",
"xnli_ca"
],
"BSC-LT/salamandra-7b": [
"escola",
"catcola",
"paws_ca",
"xnli_ca"
],
"meta-llama/Llama-3.2-1B": [
"escola",
"wnli_es",
"catcola",
"paws_ca",
"wnli_ca",
"xnli_ca"
],
"meta-llama/Llama-3.2-3B": [
"escola",
"wnli_es",
"catcola",
"paws_ca",
"wnli_ca",
"xnli_ca"
]
}
45 changes: 45 additions & 0 deletions main_eval_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import json

from src.backend.manage_requests import EvalRequest
from src.backend.run_eval_suite_harness import run_evaluation
from src.envs import (
BATCH_SIZE,
DEVICE,
EVAL_RESULTS_PATH_BACKEND,
LEADERBOARD_GROUP,
LIMIT,
LOGS_REPO,
NUM_FEWSHOT,
RESULTS_REPO,
)

if __name__ == "__main__":
with open("internal_queue/tasks_todo.json", "r") as f:
tasks_todo = json.load(f)
with open("internal_queue/model_precision.json", "r") as f:
model_precision = json.load(f)

for model in tasks_todo:
MODEL = model
TASKS_HARNESS = tasks_todo[model]
PRECISION = model_precision[model]
EVAL_REQUEST = EvalRequest(
model=MODEL,
precision=PRECISION,
base_model="", # TODO: Review arg
status="", # TODO: Review arg
json_filepath="", # TODO: Review arg
)

run_evaluation(
eval_request=EVAL_REQUEST,
task_names=TASKS_HARNESS,
leaderboard_group=LEADERBOARD_GROUP,
num_fewshot=NUM_FEWSHOT,
batch_size=BATCH_SIZE,
device=DEVICE,
local_dir=EVAL_RESULTS_PATH_BACKEND,
results_repo=RESULTS_REPO,
logs_repo=LOGS_REPO,
limit=LIMIT,
)
3 changes: 2 additions & 1 deletion main.py → main_eval_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from src.backend.sort_queue import sort_models_by_priority
from src.envs import (
API,
BATCH_SIZE,
DEVICE,
EVAL_REQUESTS_PATH_BACKEND,
EVAL_RESULTS_PATH_BACKEND,
Expand Down Expand Up @@ -97,7 +98,7 @@ def run_auto_eval():
local_dir=EVAL_RESULTS_PATH_BACKEND,
results_repo=RESULTS_REPO,
logs_repo=LOGS_REPO,
batch_size=1,
batch_size=BATCH_SIZE,
device=DEVICE,
leaderboard_group=LEADERBOARD_GROUP,
limit=LIMIT,
Expand Down
1 change: 1 addition & 0 deletions src/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
NUM_FEWSHOT = 5 # TODO: Remove to use each task's default number of few-shots
LEADERBOARD_GROUP = None # TODO: Update leaderboard group name
PARALLELIZE = True
BATCH_SIZE = 1

# Cache setup
CACHE_PATH = os.getenv("HF_HOME", ".") # /data/.huggingface
Expand Down

0 comments on commit f51f960

Please sign in to comment.