Skip to content

Commit

Permalink
Bug fix extractive match (#540)
Browse files Browse the repository at this point in the history
* update extraction match to reflect newest math-verify

* revert symbols, improve sets handling

* rm todo

* fmt + remove empty excepts + bump l2s

* fmt

* docstring

* fixed boxed, bump broken latex2sympy

* allow more separators
  • Loading branch information
hynky1999 authored Feb 6, 2025
1 parent f8405ee commit 3c9b0c9
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 8 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ multilingual = [
"jieba", # for chinese tokenizer
"pyvi", # for vietnamese tokenizer
]
math = ["latex2sympy2_extended==1.0.4"]
math = ["latex2sympy2_extended==1.0.6"]

[project.urls]
Homepage = "https://github.com/huggingface/lighteval"
Expand Down
9 changes: 3 additions & 6 deletions src/lighteval/metrics/utils/extractive_match_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,7 @@ def make_latex_env_pattern(prefix: str = "", context: Literal["boxed", "plain"]
rf"(?<!\\)\\\((?P<{prefix}latexInlineParenthesis>{inline_content_parenthesis})(?<!\\)\\\)",
rf"\s\[(?P<{prefix}latexInlineBracket>{inline_content_bracket})\]\s",
]
if context == "boxed":
# allow also matching plain boxed
patterns.append(rf"(?P<{prefix}latexBoxed>\\boxed{{.+}})")
elif context == "plain":
if context == "plain":
simple_number = r"-?\d+(?:[.,]\d+)?"
patterns.append(rf"(?P<{prefix}latexFraction>-?\\frac{{{simple_number}}}{{{simple_number}}})")

Expand All @@ -237,7 +234,7 @@ def lazy_latex_regex(latex_config: LatexExtractionConfig, language: Language) ->
and_word = translation_literal.and_word
or_word = translation_literal.or_word
next_groups = "".join(
[rf"(?:\s*(?:{and_word}|{or_word})\s*{make_latex_env_pattern(f'next{i}_')})?" for i in range(1, 6)]
[rf"(?:\s*(?:{and_word}|{or_word}|,)\s*{make_latex_env_pattern(f'next{i}_')})?" for i in range(1, 6)]
)

latex_envs_re = rf"(?:{first_latex_group}{next_groups})"
Expand Down Expand Up @@ -269,7 +266,7 @@ def lazy_latex_regex(latex_config: LatexExtractionConfig, language: Language) ->
latex_re_boxed = make_latex_env_pattern(prefix="first_", context="boxed")
next_groups = "".join(
[
rf"(?:\s*(?:{and_word}|{or_word})\s*{make_latex_env_pattern(f'next{i}_', context='boxed')})?"
rf"(?:\s*(?:{and_word}|{or_word}|,)\s*{make_latex_env_pattern(f'next{i}_', context='boxed')})?"
for i in range(1, 6)
]
)
Expand Down
12 changes: 11 additions & 1 deletion tests/metrics/test_extractive_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,7 +1035,7 @@ def test_math_extraction_additional_cases(gold, pred, expected):
),
(
r"$(2,1),(1,2),(-1,-20),(-20,-1)$",
r"solutions are:\n\n\\[\n\\boxed{(1, 2)}, \\boxed{(2, 1)}, \\boxed{(-1, -20)}, \\boxed{(-20, -1)}\n\\]",
"solutions are:\n\n\\[\n\\boxed{(1, 2)}, \\boxed{(2, 1)}, \\boxed{(-1, -20)}, \\boxed{(-20, -1)}\n\\]",
1,
),
(
Expand Down Expand Up @@ -1121,6 +1121,16 @@ def test_math_extraction_additional_cases(gold, pred, expected):
r"$\boxed{10^{\frac{\sqrt{13} - 5}{6}}} \quad \text{and} \quad \boxed{10^{-\frac{5 + \sqrt{13}}{6}}}$",
1,
),
(
r"\boxed{1} and and and or thus but \boxed{2} and \boxed{3}",
r"$\boxed{2,3}$",
1,
),
(
r"\boxed{1} and and and or thus but \boxed{2} and \boxed{3}",
r"$\boxed{1,2,3}$",
0,
),
],
)
def test_math_numina_cases(gold, pred, expected):
Expand Down

0 comments on commit 3c9b0c9

Please sign in to comment.