From 2340a4fcb85a36123b1d3ed4494d8028e0817b03 Mon Sep 17 00:00:00 2001
From: Shaltiel Shmidman <shaltieltzion@gmail.com>
Date: Sun, 25 Aug 2024 17:36:59 +0300
Subject: [PATCH] Fix retry with backoff

---
 src/lighteval/models/endpoint_model.py |  6 +++---
 src/lighteval/models/oai_model.py      |  2 +-
 src/lighteval/models/utils.py          | 11 +++++++----
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/lighteval/models/endpoint_model.py b/src/lighteval/models/endpoint_model.py
index 9781ee478..a61836428 100644
--- a/src/lighteval/models/endpoint_model.py
+++ b/src/lighteval/models/endpoint_model.py
@@ -156,16 +156,16 @@ def _async_process_request(
     ) -> Coroutine[None, list[TextGenerationOutput], str]:
         # Todo: add an option to launch with conversational instead for chat prompts
         # https://huggingface.co/docs/huggingface_hub/v0.20.3/en/package_reference/inference_client#huggingface_hub.AsyncInferenceClient.conversational
-        generated_text = self.async_client.text_generation(
+        generated_text = retry_with_backoff(lambda: self.async_client.text_generation(
             prompt=context,
             details=True,
             decoder_input_details=True,
             max_new_tokens=max_tokens,
             stop_sequences=stop_tokens,
             # truncate=,
-        )
+        ))
 
-        return retry_with_backoff(generated_text)
+        return generated_text
 
     def _process_request(self, context: str, stop_tokens: list[str], max_tokens: int) -> TextGenerationOutput:
         # Todo: add an option to launch with conversational instead for chat prompts
diff --git a/src/lighteval/models/oai_model.py b/src/lighteval/models/oai_model.py
index 5885dc37b..fe46be11b 100644
--- a/src/lighteval/models/oai_model.py
+++ b/src/lighteval/models/oai_model.py
@@ -46,7 +46,7 @@ async def _async_process_request(
         self, context: str, stop_tokens: list[str], max_tokens: int
     ) -> Coroutine[None, TextGenerationOutput, str]:
         # Todo: add an option to launch with conversational instead for chat prompts
-        output = await retry_with_backoff(self.client.completions.create(
+        output = await retry_with_backoff(lambda: self.client.completions.create(
             model="/repository", 
             prompt=context,
             max_tokens=max_tokens,
diff --git a/src/lighteval/models/utils.py b/src/lighteval/models/utils.py
index 6fa6ad601..76f8d840f 100644
--- a/src/lighteval/models/utils.py
+++ b/src/lighteval/models/utils.py
@@ -21,6 +21,7 @@
 # SOFTWARE.
 
 import asyncio
+import logging
 import os
 from itertools import islice
 from typing import Optional, Union
@@ -101,15 +102,17 @@ def batched(iterable, n):
         yield batch
 
 import random
-MAX_RETRIES = 5
+MAX_RETRIES = 15
 INITIAL_BACKOFF = 1
-async def retry_with_backoff(coro):
+async def retry_with_backoff(coro_fn):
     for attempt in range(MAX_RETRIES):
         try:
-            return await coro
+            return await coro_fn()
         except Exception as e:
             if attempt < MAX_RETRIES - 1:
-                backoff_time = INITIAL_BACKOFF * (2 ** attempt) + random.uniform(0, 1)
+                backoff_time = INITIAL_BACKOFF * (2 ** attempt) + random.uniform(0, 1) # used to be 2 **, but waited too long
+                logging.info(e)
+                logging.info(f'Encountered error, backing off and retrying in {backoff_time}s...')
                 await asyncio.sleep(backoff_time)
             else:
                 raise e