[ENH] make. it easier to cache to a DB (tatsu-lab#73)

* [GITIGNORE] rm jsons * load_dotenv * load_dotenv * [ENH] make evaluator easier to enherit * nit * changes from PR * typo
lolipopshock · Jul 10, 2023 · 0eb723b · 0eb723b
1 parent 50b77ab
commit 0eb723b
Show file tree

Hide file tree

Showing 5 changed files with 58 additions and 19 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,6 +3,9 @@ notebooks/
 example/
 .DS_Store
 .env
+src/**/*.json
+results
+./*.json
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

diff --git a/setup.py b/setup.py
@@ -11,7 +11,15 @@
     else:
         raise RuntimeError("Unable to find `__version__`.")
 
-PACKAGES_DEV = ["pre-commit>=3.2.0", "black>=23.1.0", "isort", "pytest", "pytest-mock", "pytest-skip-slow"]
+PACKAGES_DEV = [
+    "pre-commit>=3.2.0",
+    "black>=23.1.0",
+    "isort",
+    "pytest",
+    "pytest-mock",
+    "pytest-skip-slow",
+    "python-dotenv",
+]
 PACKAGES_ANALYSIS = ["seaborn", "matplotlib", "jupyterlab"]
 PACKAGES_LOCAL = ["accelerate", "transformers", "bitsandbytes", "xformers", "peft", "optimum", "scipy", "einops"]
 PACKAGES_ALL_API = ["anthropic>=0.3.0", "huggingface_hub", "cohere"]

diff --git a/src/alpaca_eval/__init__.py b/src/alpaca_eval/__init__.py
@@ -1 +1,7 @@
-__version__ = '0.2.0'
+from dotenv import load_dotenv
+
+load_dotenv()
+
+from .main import *  # noqa
+
+__version__ = "0.2.0"
diff --git a/src/alpaca_eval/annotators/pairwise_evaluator.py b/src/alpaca_eval/annotators/pairwise_evaluator.py
@@ -2,7 +2,7 @@
 import os
 from functools import partial
 from pathlib import Path
-from typing import Any, Callable, Optional, Sequence, Union
+from typing import Any, Callable, Optional, Sequence, Type, Union
 
 import numpy as np
 import pandas as pd
@@ -132,7 +132,7 @@ def __init__(
         self.annotators = self._initialize_annotators(annotators_config)
         self.caching_path = caching_path
         self.df_annotations = None
-        self.load_()
+        self.reinitialize_cache_()
 
     ### Helper properties to make it easier to inherit from this class ###
     @property
@@ -141,6 +141,10 @@ def SingleAnnotator(self):
 
     #########################################
 
+    @property
+    def annotator_name(self) -> str:
+        return Path(self.annotators_config).parent.name
+
     def annotate_samples(
         self,
         all_outputs: utils.AnyData,
@@ -383,8 +387,7 @@ def _preprocess(self, to_annotate: utils.AnyData) -> pd.DataFrame:
         )
 
         if self.is_avoid_reannotations:
-            # merge the old annotations
-            df_to_annotate = self._merge_annotations(df_to_annotate, self.df_annotations)
+            df_to_annotate = self.apply_cached_annotations(df_to_annotate)
 
         # adds random noise => avoids annotating examples that will be noised out.
         if self.p_label_flip:
@@ -413,8 +416,13 @@ def _preprocess(self, to_annotate: utils.AnyData) -> pd.DataFrame:
 
         return df_to_annotate
 
+    def apply_cached_annotations(self, df_to_annotate: pd.DataFrame) -> pd.DataFrame:
+        """annotate examples with cached annotations"""
+        df_to_annotate = self._merge_annotations(df_to_annotate, self.df_annotations)
+        return df_to_annotate
+
     def _initialize_annotators(
-        self, annotators_config: Union[utils.AnyPath, dict[str, dict[str, Any]]]
+        self, annotators_config: Union[utils.AnyPath, dict[str, dict[str, Type["SinglePairwiseAnnotator"]]]]
     ) -> dict[str, Callable]:
         """Load all the configs and prompts if necessary."""
         annotators_config = utils.load_configs(annotators_config)
@@ -471,14 +479,7 @@ def _postprocess_and_store_(
         all_keys_to_keep = self.all_keys + ["preference"] + other_keys_to_keep
         df_annotated_to_store = df_annotated_to_store[all_keys_to_keep]
 
-        if self.df_annotations is None:
-            df_annotations = df_annotated_to_store
-        else:
-            df_annotations = pd.concat([self.df_annotations, df_annotated_to_store], axis=0, ignore_index=True)
-
-        self.df_annotations = df_annotations.drop_duplicates(subset=self.all_keys, keep="last")
-
-        self.save()
+        self.store_annotations_(df_annotated_to_store)
 
         if self.is_store_missing_preferences:
             # put back np.nan
@@ -495,6 +496,17 @@ def _postprocess_and_store_(
 
         return annotated
 
+    def store_annotations_(self, df_annotated_to_store: pd.DataFrame):
+        """Store annotation in memory and on disk"""
+        if self.df_annotations is None:
+            df_annotations = df_annotated_to_store
+        else:
+            df_annotations = pd.concat([self.df_annotations, df_annotated_to_store], axis=0, ignore_index=True)
+
+        self.df_annotations = df_annotations.drop_duplicates(subset=self.all_keys, keep="last")
+
+        self.save()
+
     def save(self, path: Optional[utils.AnyPath] = None):
         """Save the annotations to json."""
         path = path or self.caching_path
@@ -514,6 +526,9 @@ def _refresh_annotations_(self):
             [self.df_annotations, curr_df_annotations], axis=0, ignore_index=True
         ).drop_duplicates(subset=self.all_keys, keep="last")
 
+    def reinitialize_cache_(self):
+        self.load_()
+
     def load_(self, path: Optional[utils.AnyPath] = None):
         """Load all the annotations from json."""
         path = path or self.caching_path
@@ -525,25 +540,26 @@ def load_(self, path: Optional[utils.AnyPath] = None):
 
     def _merge_annotations(self, df_to_annotate: pd.DataFrame, df_partially_annotated: pd.DataFrame) -> pd.DataFrame:
         """Merge (partial) annotations with the original df to keep the same order and avoid duplicates annotations."""
+
         if df_partially_annotated is None or df_partially_annotated.empty:
             return df_to_annotate
 
         other_keys_to_keep = [c for c in self.other_keys_to_keep if c in df_partially_annotated.columns]
 
-        kwargrs = dict(
+        kwargs = dict(
             on=self.all_keys,
             how="left",
             suffixes=("_old", "_new"),
         )
         try:
             df_to_annotate = df_to_annotate.merge(
-                df_partially_annotated[self.all_keys + ["preference"] + other_keys_to_keep], **kwargrs
+                df_partially_annotated[self.all_keys + ["preference"] + other_keys_to_keep], **kwargs
             )
         except ValueError:
             # can have merging issues if columns have different dtypes
             df_partially_annotated = df_partially_annotated.astype({k: str for k in self.all_keys})
             df_to_annotate = df_to_annotate.astype({k: str for k in self.all_keys}).merge(
-                df_partially_annotated[self.all_keys + ["preference"] + other_keys_to_keep], **kwargrs
+                df_partially_annotated[self.all_keys + ["preference"] + other_keys_to_keep], **kwargs
             )
 
         # if columns were in both dataframes, try to merge them

diff --git a/src/alpaca_eval/main.py b/src/alpaca_eval/main.py
@@ -12,6 +12,8 @@
 CUR_DIR = Path(__file__).parent
 DEFAULT_CONFIGS = "alpaca_eval_gpt4"
 
+__all__ = ["evaluate", "evaluate_from_model", "analyze_evaluators", "make_leaderboard"]
+
 
 def evaluate(
     model_outputs: Optional[Union[AnyPath, AnyData, Callable]] = None,
@@ -29,6 +31,7 @@ def evaluate(
     is_cache_leaderboard: Optional[bool] = None,
     max_instances: Optional[int] = None,
     annotation_kwargs: Optional[dict[str, Any]] = None,
+    Annotator=annotators.PairwiseAnnotator,
     **annotator_kwargs,
 ):
     """Evaluate a model based on its outputs. This is the default entrypoint if no command is specified.
@@ -94,6 +97,9 @@ def evaluate(
     annotation_kwargs : dict, optional
         Additional arguments to pass to `PairwiseAnnotator.annotate_head2head`.
 
+    Annotator : class, optional
+        The annotator class to use.
+
     annotator_kwargs :
         Additional arguments to pass to `PairwiseAnnotator`.
     """
@@ -122,7 +128,7 @@ def evaluate(
                 model_outputs = model_outputs[:max_instances]
                 reference_outputs = reference_outputs[:max_instances]
 
-            annotator = annotators.PairwiseAnnotator(annotators_config=annotators_config, **annotator_kwargs)
+            annotator = Annotator(annotators_config=annotators_config, **annotator_kwargs)
             annotations = annotator.annotate_head2head(
                 outputs_1=reference_outputs, outputs_2=model_outputs, **annotation_kwargs
             )