Remove insights, have single asyncio run, refactor

topoteretes · Jan 16, 2025 · 33f09f2 · 33f09f2
1 parent b32025b
commit 33f09f2
Show file tree

Hide file tree

Showing 3 changed files with 104 additions and 40 deletions.
diff --git a/evals/qa_eval_parameters.json b/evals/qa_eval_parameters.json
@@ -0,0 +1,18 @@
+{
+    "dataset": [
+        "hotpotqa"
+    ],
+    "rag_option": [
+        "no_rag",
+        "cognee",
+        "simple_rag",
+        "brute_force"
+    ],
+    "num_samples": [
+        2
+    ],
+    "metric_names": [
+        "Correctness",
+        "Comprehensiveness"
+    ]
+}
diff --git a/evals/qa_eval_utils.py b/evals/qa_eval_utils.py
@@ -0,0 +1,60 @@
+import itertools
+import matplotlib.pyplot as plt
+from jsonschema import ValidationError, validate
+import pandas as pd
+from pathlib import Path
+
+paramset_json_schema = {
+    "type": "object",
+    "properties": {
+        "dataset": {
+            "type": "array",
+            "items": {"type": "string"},
+        },
+        "rag_option": {
+            "type": "array",
+            "items": {"type": "string"},
+        },
+        "num_samples": {
+            "type": "array",
+            "items": {"type": "integer", "minimum": 1},
+        },
+        "metric_names": {
+            "type": "array",
+            "items": {"type": "string"},
+        },
+    },
+    "required": ["dataset", "rag_option", "num_samples", "metric_names"],
+    "additionalProperties": False,
+}
+
+
+def save_table_as_image(df, image_path):
+    plt.figure(figsize=(10, 6))
+    plt.axis("tight")
+    plt.axis("off")
+    plt.table(cellText=df.values, colLabels=df.columns, rowLabels=df.index, loc="center")
+    plt.title(f"{df.index.name}")
+    plt.savefig(image_path, bbox_inches="tight")
+    plt.close()
+
+
+def save_results_as_image(results, out_path):
+    for dataset, num_samples_data in results.items():
+        for num_samples, table_data in num_samples_data.items():
+            df = pd.DataFrame.from_dict(table_data, orient="index")
+            df.index.name = f"Dataset: {dataset}, Num Samples: {num_samples}"
+            image_path = Path(out_path) / Path(f"table_{dataset}_{num_samples}.png")
+            save_table_as_image(df, image_path)
+
+
+def get_combinations(parameters):
+    try:
+        validate(instance=parameters, schema=paramset_json_schema)
+    except ValidationError as e:
+        raise ValidationError(f"Invalid parameter set: {e.message}")
+
+    params_for_combos = {k: v for k, v in parameters.items() if k != "metric_name"}
+    keys, values = zip(*params_for_combos.items())
+    combinations = [dict(zip(keys, combo)) for combo in itertools.product(*values)]
+    return combinations
diff --git a/evals/run_qa_eval.py b/evals/run_qa_eval.py
@@ -1,36 +1,14 @@
 import asyncio
-import itertools
 from evals.eval_on_hotpot import eval_on_QA_dataset
+from evals.qa_eval_utils import get_combinations, save_results_as_image
+import argparse
+from pathlib import Path
 import json
-import pandas as pd
-import matplotlib.pyplot as plt
 
 
-def save_table_as_image(df, image_path):
-    plt.figure(figsize=(10, 6))
-    plt.axis("tight")
-    plt.axis("off")
-    plt.table(cellText=df.values, colLabels=df.columns, rowLabels=df.index, loc="center")
-    plt.title(f"{df.index.name}")
-    plt.savefig(image_path, bbox_inches="tight")
-    plt.close()
-
-
-parameters = {
-    "dataset": ["hotpotqa"],  # "2wikimultihop"],
-    "rag_option": ["no_rag", "cognee", "simple_rag", "brute_force"],
-    "num_samples": [2],
-    "metric_names": ["Correctness", "Comprehensiveness"],
-}
-
-# Generate the cross product of all parameter values
-params_for_combos = {k: v for k, v in parameters.items() if k != "metric_name"}
-keys, values = zip(*params_for_combos.items())
-combinations = [dict(zip(keys, combo)) for combo in itertools.product(*values)]
-
-
-# Main async function to run all combinations concurrently
-async def main():
+async def run_evals_on_paramset(paramset: dict, out_path: str):
+    combinations = get_combinations(paramset)
+    json_path = Path(out_path) / Path("results.json")
     results = {}
     for params in combinations:
         dataset = params["dataset"]
@@ -41,30 +19,38 @@ async def main():
             dataset,
             rag_option,
             num_samples,
-            parameters["metric_names"],
+            paramset["metric_names"],
         )
 
-        # Initialize nested structure if needed
         if dataset not in results:
             results[dataset] = {}
         if num_samples not in results[dataset]:
             results[dataset][num_samples] = {}
 
-        # Update the nested dictionary
         results[dataset][num_samples][rag_option] = result
 
-        # Save results as JSON
-        json_path = "results.json"
         with open(json_path, "w") as file:
             json.dump(results, file, indent=1)
 
-    # Convert to tables and save images
-    for dataset, num_samples_data in results.items():
-        for num_samples, table_data in num_samples_data.items():
-            df = pd.DataFrame.from_dict(table_data, orient="index")
-            df.index.name = f"Dataset: {dataset}, Num Samples: {num_samples}"
-            image_path = f"table_{dataset}_{num_samples}.png"
-            save_table_as_image(df, image_path)
+        save_results_as_image(results, out_path)
+
+    return results
+
+
+async def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        "--params_file", type=str, required=True, help="Which dataset to evaluate on"
+    )
+    parser.add_argument("--out_dir", type=str, help="Dir to save eval results")
+
+    args = parser.parse_args()
+
+    with open(args.params_file, "r") as file:
+        parameters = json.load(file)
+
+    await run_evals_on_paramset(parameters, args.out_dir)
 
 
 if __name__ == "__main__":