diff --git a/pyproject.toml b/pyproject.toml index 2c3a76f5..25caf31f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,6 +109,7 @@ multilingual = [ "jieba", # for chinese tokenizer "pyvi", # for vietnamese tokenizer ] +math = ["latex2sympy2_extended>=0.8.0"] [project.urls] Homepage = "https://github.com/huggingface/lighteval" diff --git a/src/lighteval/metrics/dynamic_metrics.py b/src/lighteval/metrics/dynamic_metrics.py index 11aef25c..e83c9ea6 100644 --- a/src/lighteval/metrics/dynamic_metrics.py +++ b/src/lighteval/metrics/dynamic_metrics.py @@ -38,7 +38,7 @@ LogProbTokenNorm, get_multilingual_normalizer, ) -from lighteval.metrics.utils.extraction_utils import ( # noqa: F401 +from lighteval.metrics.utils.extractive_match_utils import ( # noqa: F401 ExprExtractionConfig, ExtractionTarget, IndicesExtractionConfig, diff --git a/src/lighteval/metrics/utils/extraction_utils.py b/src/lighteval/metrics/utils/extractive_match_utils.py similarity index 97% rename from src/lighteval/metrics/utils/extraction_utils.py rename to src/lighteval/metrics/utils/extractive_match_utils.py index 2cb3ce85..1a0d6c94 100644 --- a/src/lighteval/metrics/utils/extraction_utils.py +++ b/src/lighteval/metrics/utils/extractive_match_utils.py @@ -27,7 +27,6 @@ from typing import Literal, Sequence import sympy -from latex2sympy2_extended.latex2sympy2 import NormalizationConfig, convert_to_pct, latex2sympy, normalize_latex from sympy import Basic, MatrixBase, Number from sympy.parsing import parse_expr @@ -35,6 +34,7 @@ from lighteval.tasks.requests import Doc from lighteval.tasks.templates.utils.formulation import ChoicePrefix, get_prefix from lighteval.tasks.templates.utils.translation_literals import TRANSLATION_LITERALS +from lighteval.utils.imports import requires_latex2sympy2_extended from lighteval.utils.language import Language from lighteval.utils.timeout import timeout @@ -282,7 +282,10 @@ def get_target_type_order(target_type: ExtractionTarget) -> int: # Small cache, to catche repeated calls invalid parsing @lru_cache(maxsize=20) @timeout(timeout_seconds=5) +@requires_latex2sympy2_extended def parse_latex_with_timeout(latex: str): + from latex2sympy2_extended.latex2sympy2 import latex2sympy + return latex2sympy(latex, is_real=not should_treat_as_complex(latex), convert_degrees=False) @@ -324,9 +327,17 @@ def extract_expr(match: re.Match) -> tuple[str | sympy.Expr | None, str]: return None, expr +def convert_to_pct(number: Number): + return sympy.Mul(number, sympy.Rational(1, 100), evaluate=False) + + @lru_cache(maxsize=1000) +@timeout(timeout_seconds=5) +@requires_latex2sympy2_extended def extract_latex(match: re.Match) -> tuple[sympy.Expr | str | None, str]: - _, latex = next((val for name, val in match.groupdict().items() if name.startswith("latex") and val), "") + from latex2sympy2_extended.latex2sympy2 import NormalizationConfig, normalize_latex + + latex = next((val for name, val in match.groupdict().items() if name.startswith("latex") and val), "") is_percentage = True if match.group("percent") else False normalized_latex = normalize_latex( diff --git a/src/lighteval/utils/imports.py b/src/lighteval/utils/imports.py index c8fb2ce7..9b92adce 100644 --- a/src/lighteval/utils/imports.py +++ b/src/lighteval/utils/imports.py @@ -123,3 +123,20 @@ def can_load_stanza_tokenizer() -> bool: NO_STANZA_TOKENIZER_ERROR_MSG = "You are trying to load a stanza tokenizer, for which you need `stanza`, which is not available in your environment. Please install it using `pip install lighteval[multilingual]`." + + +# Better than having to check import every time +def requires_latex2sympy2_extended(func): + checked_import = False + + def wrapper(*args, **kwargs): + nonlocal checked_import + if not checked_import and importlib.util.find_spec("latex2sympy2_extended") is None: + raise ImportError(NO_LATEX2SYMPY2_EXTENDED_ERROR_MSG) + checked_import = True + return func(*args, **kwargs) + + return wrapper + + +NO_LATEX2SYMPY2_EXTENDED_ERROR_MSG = "You are trying to parse latex expressions, for which you need `latex2sympy2_extended`, which is not available in your environment. Please install it using `pip install lighteval[math]`."