Skip to content

Commit

Permalink
Remove insights, have single asyncio run, refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
alekszievr committed Jan 16, 2025
1 parent b32025b commit 33f09f2
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 40 deletions.
18 changes: 18 additions & 0 deletions evals/qa_eval_parameters.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"dataset": [
"hotpotqa"
],
"rag_option": [
"no_rag",
"cognee",
"simple_rag",
"brute_force"
],
"num_samples": [
2
],
"metric_names": [
"Correctness",
"Comprehensiveness"
]
}
60 changes: 60 additions & 0 deletions evals/qa_eval_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import itertools
import matplotlib.pyplot as plt
from jsonschema import ValidationError, validate
import pandas as pd
from pathlib import Path

paramset_json_schema = {
"type": "object",
"properties": {
"dataset": {
"type": "array",
"items": {"type": "string"},
},
"rag_option": {
"type": "array",
"items": {"type": "string"},
},
"num_samples": {
"type": "array",
"items": {"type": "integer", "minimum": 1},
},
"metric_names": {
"type": "array",
"items": {"type": "string"},
},
},
"required": ["dataset", "rag_option", "num_samples", "metric_names"],
"additionalProperties": False,
}


def save_table_as_image(df, image_path):
plt.figure(figsize=(10, 6))
plt.axis("tight")
plt.axis("off")
plt.table(cellText=df.values, colLabels=df.columns, rowLabels=df.index, loc="center")
plt.title(f"{df.index.name}")
plt.savefig(image_path, bbox_inches="tight")
plt.close()


def save_results_as_image(results, out_path):
for dataset, num_samples_data in results.items():
for num_samples, table_data in num_samples_data.items():
df = pd.DataFrame.from_dict(table_data, orient="index")
df.index.name = f"Dataset: {dataset}, Num Samples: {num_samples}"
image_path = Path(out_path) / Path(f"table_{dataset}_{num_samples}.png")
save_table_as_image(df, image_path)


def get_combinations(parameters):
try:
validate(instance=parameters, schema=paramset_json_schema)
except ValidationError as e:
raise ValidationError(f"Invalid parameter set: {e.message}")

params_for_combos = {k: v for k, v in parameters.items() if k != "metric_name"}
keys, values = zip(*params_for_combos.items())
combinations = [dict(zip(keys, combo)) for combo in itertools.product(*values)]
return combinations
66 changes: 26 additions & 40 deletions evals/run_qa_eval.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,14 @@
import asyncio
import itertools
from evals.eval_on_hotpot import eval_on_QA_dataset
from evals.qa_eval_utils import get_combinations, save_results_as_image
import argparse
from pathlib import Path
import json
import pandas as pd
import matplotlib.pyplot as plt


def save_table_as_image(df, image_path):
plt.figure(figsize=(10, 6))
plt.axis("tight")
plt.axis("off")
plt.table(cellText=df.values, colLabels=df.columns, rowLabels=df.index, loc="center")
plt.title(f"{df.index.name}")
plt.savefig(image_path, bbox_inches="tight")
plt.close()


parameters = {
"dataset": ["hotpotqa"], # "2wikimultihop"],
"rag_option": ["no_rag", "cognee", "simple_rag", "brute_force"],
"num_samples": [2],
"metric_names": ["Correctness", "Comprehensiveness"],
}

# Generate the cross product of all parameter values
params_for_combos = {k: v for k, v in parameters.items() if k != "metric_name"}
keys, values = zip(*params_for_combos.items())
combinations = [dict(zip(keys, combo)) for combo in itertools.product(*values)]


# Main async function to run all combinations concurrently
async def main():
async def run_evals_on_paramset(paramset: dict, out_path: str):
combinations = get_combinations(paramset)
json_path = Path(out_path) / Path("results.json")
results = {}
for params in combinations:
dataset = params["dataset"]
Expand All @@ -41,30 +19,38 @@ async def main():
dataset,
rag_option,
num_samples,
parameters["metric_names"],
paramset["metric_names"],
)

# Initialize nested structure if needed
if dataset not in results:
results[dataset] = {}
if num_samples not in results[dataset]:
results[dataset][num_samples] = {}

# Update the nested dictionary
results[dataset][num_samples][rag_option] = result

# Save results as JSON
json_path = "results.json"
with open(json_path, "w") as file:
json.dump(results, file, indent=1)

# Convert to tables and save images
for dataset, num_samples_data in results.items():
for num_samples, table_data in num_samples_data.items():
df = pd.DataFrame.from_dict(table_data, orient="index")
df.index.name = f"Dataset: {dataset}, Num Samples: {num_samples}"
image_path = f"table_{dataset}_{num_samples}.png"
save_table_as_image(df, image_path)
save_results_as_image(results, out_path)

return results


async def main():
parser = argparse.ArgumentParser()

parser.add_argument(
"--params_file", type=str, required=True, help="Which dataset to evaluate on"
)
parser.add_argument("--out_dir", type=str, help="Dir to save eval results")

args = parser.parse_args()

with open(args.params_file, "r") as file:
parameters = json.load(file)

await run_evals_on_paramset(parameters, args.out_dir)


if __name__ == "__main__":
Expand Down

0 comments on commit 33f09f2

Please sign in to comment.