Skip to content

Commit

Permalink
[ENH] make. it easier to cache to a DB (tatsu-lab#73)
Browse files Browse the repository at this point in the history
* [GITIGNORE] rm jsons

* load_dotenv

* load_dotenv

* [ENH] make evaluator easier to enherit

* nit

* changes from PR

* typo
  • Loading branch information
YannDubs authored Jul 10, 2023
1 parent 50b77ab commit 0eb723b
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 19 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@ notebooks/
example/
.DS_Store
.env
src/**/*.json
results
./*.json

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
10 changes: 9 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,15 @@
else:
raise RuntimeError("Unable to find `__version__`.")

PACKAGES_DEV = ["pre-commit>=3.2.0", "black>=23.1.0", "isort", "pytest", "pytest-mock", "pytest-skip-slow"]
PACKAGES_DEV = [
"pre-commit>=3.2.0",
"black>=23.1.0",
"isort",
"pytest",
"pytest-mock",
"pytest-skip-slow",
"python-dotenv",
]
PACKAGES_ANALYSIS = ["seaborn", "matplotlib", "jupyterlab"]
PACKAGES_LOCAL = ["accelerate", "transformers", "bitsandbytes", "xformers", "peft", "optimum", "scipy", "einops"]
PACKAGES_ALL_API = ["anthropic>=0.3.0", "huggingface_hub", "cohere"]
Expand Down
8 changes: 7 additions & 1 deletion src/alpaca_eval/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
__version__ = '0.2.0'
from dotenv import load_dotenv

load_dotenv()

from .main import * # noqa

__version__ = "0.2.0"
48 changes: 32 additions & 16 deletions src/alpaca_eval/annotators/pairwise_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os
from functools import partial
from pathlib import Path
from typing import Any, Callable, Optional, Sequence, Union
from typing import Any, Callable, Optional, Sequence, Type, Union

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -132,7 +132,7 @@ def __init__(
self.annotators = self._initialize_annotators(annotators_config)
self.caching_path = caching_path
self.df_annotations = None
self.load_()
self.reinitialize_cache_()

### Helper properties to make it easier to inherit from this class ###
@property
Expand All @@ -141,6 +141,10 @@ def SingleAnnotator(self):

#########################################

@property
def annotator_name(self) -> str:
return Path(self.annotators_config).parent.name

def annotate_samples(
self,
all_outputs: utils.AnyData,
Expand Down Expand Up @@ -383,8 +387,7 @@ def _preprocess(self, to_annotate: utils.AnyData) -> pd.DataFrame:
)

if self.is_avoid_reannotations:
# merge the old annotations
df_to_annotate = self._merge_annotations(df_to_annotate, self.df_annotations)
df_to_annotate = self.apply_cached_annotations(df_to_annotate)

# adds random noise => avoids annotating examples that will be noised out.
if self.p_label_flip:
Expand Down Expand Up @@ -413,8 +416,13 @@ def _preprocess(self, to_annotate: utils.AnyData) -> pd.DataFrame:

return df_to_annotate

def apply_cached_annotations(self, df_to_annotate: pd.DataFrame) -> pd.DataFrame:
"""annotate examples with cached annotations"""
df_to_annotate = self._merge_annotations(df_to_annotate, self.df_annotations)
return df_to_annotate

def _initialize_annotators(
self, annotators_config: Union[utils.AnyPath, dict[str, dict[str, Any]]]
self, annotators_config: Union[utils.AnyPath, dict[str, dict[str, Type["SinglePairwiseAnnotator"]]]]
) -> dict[str, Callable]:
"""Load all the configs and prompts if necessary."""
annotators_config = utils.load_configs(annotators_config)
Expand Down Expand Up @@ -471,14 +479,7 @@ def _postprocess_and_store_(
all_keys_to_keep = self.all_keys + ["preference"] + other_keys_to_keep
df_annotated_to_store = df_annotated_to_store[all_keys_to_keep]

if self.df_annotations is None:
df_annotations = df_annotated_to_store
else:
df_annotations = pd.concat([self.df_annotations, df_annotated_to_store], axis=0, ignore_index=True)

self.df_annotations = df_annotations.drop_duplicates(subset=self.all_keys, keep="last")

self.save()
self.store_annotations_(df_annotated_to_store)

if self.is_store_missing_preferences:
# put back np.nan
Expand All @@ -495,6 +496,17 @@ def _postprocess_and_store_(

return annotated

def store_annotations_(self, df_annotated_to_store: pd.DataFrame):
"""Store annotation in memory and on disk"""
if self.df_annotations is None:
df_annotations = df_annotated_to_store
else:
df_annotations = pd.concat([self.df_annotations, df_annotated_to_store], axis=0, ignore_index=True)

self.df_annotations = df_annotations.drop_duplicates(subset=self.all_keys, keep="last")

self.save()

def save(self, path: Optional[utils.AnyPath] = None):
"""Save the annotations to json."""
path = path or self.caching_path
Expand All @@ -514,6 +526,9 @@ def _refresh_annotations_(self):
[self.df_annotations, curr_df_annotations], axis=0, ignore_index=True
).drop_duplicates(subset=self.all_keys, keep="last")

def reinitialize_cache_(self):
self.load_()

def load_(self, path: Optional[utils.AnyPath] = None):
"""Load all the annotations from json."""
path = path or self.caching_path
Expand All @@ -525,25 +540,26 @@ def load_(self, path: Optional[utils.AnyPath] = None):

def _merge_annotations(self, df_to_annotate: pd.DataFrame, df_partially_annotated: pd.DataFrame) -> pd.DataFrame:
"""Merge (partial) annotations with the original df to keep the same order and avoid duplicates annotations."""

if df_partially_annotated is None or df_partially_annotated.empty:
return df_to_annotate

other_keys_to_keep = [c for c in self.other_keys_to_keep if c in df_partially_annotated.columns]

kwargrs = dict(
kwargs = dict(
on=self.all_keys,
how="left",
suffixes=("_old", "_new"),
)
try:
df_to_annotate = df_to_annotate.merge(
df_partially_annotated[self.all_keys + ["preference"] + other_keys_to_keep], **kwargrs
df_partially_annotated[self.all_keys + ["preference"] + other_keys_to_keep], **kwargs
)
except ValueError:
# can have merging issues if columns have different dtypes
df_partially_annotated = df_partially_annotated.astype({k: str for k in self.all_keys})
df_to_annotate = df_to_annotate.astype({k: str for k in self.all_keys}).merge(
df_partially_annotated[self.all_keys + ["preference"] + other_keys_to_keep], **kwargrs
df_partially_annotated[self.all_keys + ["preference"] + other_keys_to_keep], **kwargs
)

# if columns were in both dataframes, try to merge them
Expand Down
8 changes: 7 additions & 1 deletion src/alpaca_eval/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
CUR_DIR = Path(__file__).parent
DEFAULT_CONFIGS = "alpaca_eval_gpt4"

__all__ = ["evaluate", "evaluate_from_model", "analyze_evaluators", "make_leaderboard"]


def evaluate(
model_outputs: Optional[Union[AnyPath, AnyData, Callable]] = None,
Expand All @@ -29,6 +31,7 @@ def evaluate(
is_cache_leaderboard: Optional[bool] = None,
max_instances: Optional[int] = None,
annotation_kwargs: Optional[dict[str, Any]] = None,
Annotator=annotators.PairwiseAnnotator,
**annotator_kwargs,
):
"""Evaluate a model based on its outputs. This is the default entrypoint if no command is specified.
Expand Down Expand Up @@ -94,6 +97,9 @@ def evaluate(
annotation_kwargs : dict, optional
Additional arguments to pass to `PairwiseAnnotator.annotate_head2head`.
Annotator : class, optional
The annotator class to use.
annotator_kwargs :
Additional arguments to pass to `PairwiseAnnotator`.
"""
Expand Down Expand Up @@ -122,7 +128,7 @@ def evaluate(
model_outputs = model_outputs[:max_instances]
reference_outputs = reference_outputs[:max_instances]

annotator = annotators.PairwiseAnnotator(annotators_config=annotators_config, **annotator_kwargs)
annotator = Annotator(annotators_config=annotators_config, **annotator_kwargs)
annotations = annotator.annotate_head2head(
outputs_1=reference_outputs, outputs_2=model_outputs, **annotation_kwargs
)
Expand Down

0 comments on commit 0eb723b

Please sign in to comment.