diff --git a/src/lighteval/tasks/extended/mix_eval/main.py b/src/lighteval/tasks/extended/mix_eval/main.py
index 8684e910c..eaa58f2a5 100644
--- a/src/lighteval/tasks/extended/mix_eval/main.py
+++ b/src/lighteval/tasks/extended/mix_eval/main.py
@@ -20,6 +20,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
+import logging
import re
import numpy as np
@@ -37,6 +38,9 @@
from lighteval.tasks.requests import Doc
+logger = logging.getLogger(__name__)
+
+
def mixeval_freeform_prompt(line, task_name: str = ""):
prompt = construct_prompt_freeform(line)
return Doc(
@@ -71,19 +75,30 @@ def mixeval_multichoice_prompt(line, task_name: str = ""):
def process_judge_response(x):
- search = re.search(r"\s(\d)\s", x)
- return int(search.group(1)) if search else 0
+ try:
+ search = re.search(r"\s(\d)\s", x)
+ return int(search.group(1)) if search else 0
+ except Exception as e:
+ logger.warning(f"Error processing judge response for flow: {e}")
+ return 0
def process_judge_response_multichoice_gpt(x):
- search = re.search(r"\[\[([01])\]\]", x)
- return int(search.group(1)) if search else 0
+ try:
+ search = re.search(r"\[\[([01])\]\]", x)
+ return int(search.group(1)) if search else 0
+ except Exception as e:
+ logger.warning(f"Error processing judge response for multichoice GPT: {e}")
+ return 0
def process_judge_response_freeform_gpt(x):
- search = re.search(r"\[\[(\d.\d)\]\]", x)
- answer = float(search.group(1) if search else 0)
- return answer
+ try:
+ search = re.search(r"\[\[(\d.\d)\]\]", x)
+ return float(search.group(1)) if search else 0
+ except Exception as e:
+ logger.warning(f"Error processing judge response for freeform GPT: {e}")
+ return 0
llm_judge_mixeval_multichoice_flow_judge = SampleLevelMetricGrouping(