Skip to content

Commit

Permalink
better docstring
Browse files Browse the repository at this point in the history
  • Loading branch information
hynky1999 committed Jan 11, 2025
1 parent eeaceaf commit 4f1f85a
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 8 deletions.
30 changes: 24 additions & 6 deletions src/lighteval/metrics/dynamic_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# SOFTWARE.

import logging
from typing import Callable, Literal
from typing import Callable, Literal, Sequence

import numpy as np

Expand Down Expand Up @@ -187,21 +187,39 @@ def multilingual_quasi_exact_match_metric(

def multilingual_extractive_match_metric(
language: Language,
gold_extraction_target: tuple[ExtractionTarget] = (ExprExtractionConfig(),),
pred_extraction_target: tuple[ExtractionTarget] = (ExprExtractionConfig(),),
gold_extraction_target: Sequence[ExtractionTarget] = (ExprExtractionConfig(),),
pred_extraction_target: Sequence[ExtractionTarget] = (ExprExtractionConfig(),),
aggregation_function: Callable[[list[float]], float] = max,
fallback_mode: Literal["no_fallback", "first_match"] = "first_match",
precision: int = 6,
) -> SampleLevelMetric:
"""
"""Creates a language-aware extractive match metric that extracts answers from the model's output.
Known issues:
- If the task is to simplify an expression, the metric might overestimate the accuracy. This is because if the model doesn't output any anchor for the extraction (e.g final answer is..),
it's possible that the the extracted prediction will be the expression to simplify. Because we do simplifications ourselves, it can thus happen that sympy will correctly simplify the expression,
thus it will match gold, despite model not doing anything. You can try to limit this issue by setting extraction_mode to "first_match" instead of "first_extraction", but this will likely incurr
too low recall on correct predictions.
thus it will match gold, despite model not doing anything. PRs to fix this are welcome.
- There is currently no StringExtractionConfig, so if the gold is \boxed{\text{Friday}} and model outputs Friday it will not match, because nothing will be extracted.
Args:
language: Language
The language of the samples.
gold_extraction_target: Sequence[ExtractionTarget]
Extraction targets to use for gold answers. Defaults to extracting simple math expressions.
pred_extraction_target: Sequence[ExtractionTarget]
Extraction targets to use for predictions. Defaults to extracting simple math expressions.
aggregation_function: Callable[[list[float]], float]
Function to aggregate scores when multiple golds/predictions are present. Defaults to max.
fallback_mode: Literal["no_fallback", "first_match"]
How to perform extraction. Defaults to "first_match".
- "no_fallback": Only use first successfully parsed matches
- "first_match": Use the first successfully parsed match + first match irregardless the parsing success
precision: int
Number of decimal places to use when comparing numerical values. Defaults to 6.
Returns:
A sample level metric that extracts and compares mathematical expressions.
"""

Expand Down
4 changes: 2 additions & 2 deletions src/lighteval/metrics/utils/extraction_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from dataclasses import dataclass
from functools import lru_cache
from itertools import groupby
from typing import Literal
from typing import Literal, Sequence

import sympy
from latex2sympy2_extended.latex2sympy2 import NormalizationConfig, convert_to_pct, latex2sympy, normalize_latex
Expand Down Expand Up @@ -253,7 +253,7 @@ def lazy_indices_regex(


def get_extraction_regexes(
formatted_doc: Doc, target_types: tuple[ExtractionTarget], language: Language
formatted_doc: Doc, target_types: Sequence[ExtractionTarget], language: Language
) -> list[tuple[list[tuple[re.Pattern[str], int]], ExtractionTarget]]:
extraction_regexes: list[tuple[list[tuple[re.Pattern[str], int]], ExtractionTarget]] = [
(lazy_latex_regex(target_type, language), target_type)
Expand Down

0 comments on commit 4f1f85a

Please sign in to comment.