better docstring

huggingface · Jan 11, 2025 · 4f1f85a · 4f1f85a
1 parent eeaceaf
commit 4f1f85a
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 8 deletions.
diff --git a/src/lighteval/metrics/dynamic_metrics.py b/src/lighteval/metrics/dynamic_metrics.py
@@ -21,7 +21,7 @@
 # SOFTWARE.
 
 import logging
-from typing import Callable, Literal
+from typing import Callable, Literal, Sequence
 
 import numpy as np
 
@@ -187,21 +187,39 @@ def multilingual_quasi_exact_match_metric(
 
 def multilingual_extractive_match_metric(
     language: Language,
-    gold_extraction_target: tuple[ExtractionTarget] = (ExprExtractionConfig(),),
-    pred_extraction_target: tuple[ExtractionTarget] = (ExprExtractionConfig(),),
+    gold_extraction_target: Sequence[ExtractionTarget] = (ExprExtractionConfig(),),
+    pred_extraction_target: Sequence[ExtractionTarget] = (ExprExtractionConfig(),),
     aggregation_function: Callable[[list[float]], float] = max,
     fallback_mode: Literal["no_fallback", "first_match"] = "first_match",
     precision: int = 6,
 ) -> SampleLevelMetric:
-    """
+    """Creates a language-aware extractive match metric that extracts answers from the model's output.
 
     Known issues:
     - If the task is to simplify an expression, the metric might overestimate the accuracy. This is because if the model doesn't output any anchor for the extraction (e.g final answer is..),
         it's possible that the the extracted prediction will be the expression to simplify. Because we do simplifications ourselves, it can thus happen that sympy will correctly simplify the expression,
-        thus it will match gold, despite model not doing anything. You can try to limit this issue by setting extraction_mode to "first_match" instead of "first_extraction", but this will likely incurr
-        too low recall on correct predictions.
+        thus it will match gold, despite model not doing anything. PRs to fix this are welcome.
+
     - There is currently no StringExtractionConfig, so if the gold is \boxed{\text{Friday}} and model outputs Friday it will not match, because nothing will be extracted.
 
+    Args:
+        language: Language
+            The language of the samples.
+        gold_extraction_target: Sequence[ExtractionTarget]
+            Extraction targets to use for gold answers. Defaults to extracting simple math expressions.
+        pred_extraction_target: Sequence[ExtractionTarget]
+            Extraction targets to use for predictions. Defaults to extracting simple math expressions.
+        aggregation_function: Callable[[list[float]], float]
+            Function to aggregate scores when multiple golds/predictions are present. Defaults to max.
+        fallback_mode: Literal["no_fallback", "first_match"]
+            How to perform extraction. Defaults to "first_match".
+            - "no_fallback": Only use first successfully parsed matches
+            - "first_match": Use the first successfully parsed match + first match irregardless the parsing success
+        precision: int
+            Number of decimal places to use when comparing numerical values. Defaults to 6.
+
+    Returns:
+        A sample level metric that extracts and compares mathematical expressions.
 
     """
 

diff --git a/src/lighteval/metrics/utils/extraction_utils.py b/src/lighteval/metrics/utils/extraction_utils.py
@@ -24,7 +24,7 @@
 from dataclasses import dataclass
 from functools import lru_cache
 from itertools import groupby
-from typing import Literal
+from typing import Literal, Sequence
 
 import sympy
 from latex2sympy2_extended.latex2sympy2 import NormalizationConfig, convert_to_pct, latex2sympy, normalize_latex
@@ -253,7 +253,7 @@ def lazy_indices_regex(
 
 
 def get_extraction_regexes(
-    formatted_doc: Doc, target_types: tuple[ExtractionTarget], language: Language
+    formatted_doc: Doc, target_types: Sequence[ExtractionTarget], language: Language
 ) -> list[tuple[list[tuple[re.Pattern[str], int]], ExtractionTarget]]:
     extraction_regexes: list[tuple[list[tuple[re.Pattern[str], int]], ExtractionTarget]] = [
         (lazy_latex_regex(target_type, language), target_type)