From 2340a4fcb85a36123b1d3ed4494d8028e0817b03 Mon Sep 17 00:00:00 2001 From: Shaltiel Shmidman Date: Sun, 25 Aug 2024 17:36:59 +0300 Subject: [PATCH] Fix retry with backoff --- src/lighteval/models/endpoint_model.py | 6 +++--- src/lighteval/models/oai_model.py | 2 +- src/lighteval/models/utils.py | 11 +++++++---- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/lighteval/models/endpoint_model.py b/src/lighteval/models/endpoint_model.py index 9781ee478..a61836428 100644 --- a/src/lighteval/models/endpoint_model.py +++ b/src/lighteval/models/endpoint_model.py @@ -156,16 +156,16 @@ def _async_process_request( ) -> Coroutine[None, list[TextGenerationOutput], str]: # Todo: add an option to launch with conversational instead for chat prompts # https://huggingface.co/docs/huggingface_hub/v0.20.3/en/package_reference/inference_client#huggingface_hub.AsyncInferenceClient.conversational - generated_text = self.async_client.text_generation( + generated_text = retry_with_backoff(lambda: self.async_client.text_generation( prompt=context, details=True, decoder_input_details=True, max_new_tokens=max_tokens, stop_sequences=stop_tokens, # truncate=, - ) + )) - return retry_with_backoff(generated_text) + return generated_text def _process_request(self, context: str, stop_tokens: list[str], max_tokens: int) -> TextGenerationOutput: # Todo: add an option to launch with conversational instead for chat prompts diff --git a/src/lighteval/models/oai_model.py b/src/lighteval/models/oai_model.py index 5885dc37b..fe46be11b 100644 --- a/src/lighteval/models/oai_model.py +++ b/src/lighteval/models/oai_model.py @@ -46,7 +46,7 @@ async def _async_process_request( self, context: str, stop_tokens: list[str], max_tokens: int ) -> Coroutine[None, TextGenerationOutput, str]: # Todo: add an option to launch with conversational instead for chat prompts - output = await retry_with_backoff(self.client.completions.create( + output = await retry_with_backoff(lambda: self.client.completions.create( model="/repository", prompt=context, max_tokens=max_tokens, diff --git a/src/lighteval/models/utils.py b/src/lighteval/models/utils.py index 6fa6ad601..76f8d840f 100644 --- a/src/lighteval/models/utils.py +++ b/src/lighteval/models/utils.py @@ -21,6 +21,7 @@ # SOFTWARE. import asyncio +import logging import os from itertools import islice from typing import Optional, Union @@ -101,15 +102,17 @@ def batched(iterable, n): yield batch import random -MAX_RETRIES = 5 +MAX_RETRIES = 15 INITIAL_BACKOFF = 1 -async def retry_with_backoff(coro): +async def retry_with_backoff(coro_fn): for attempt in range(MAX_RETRIES): try: - return await coro + return await coro_fn() except Exception as e: if attempt < MAX_RETRIES - 1: - backoff_time = INITIAL_BACKOFF * (2 ** attempt) + random.uniform(0, 1) + backoff_time = INITIAL_BACKOFF * (2 ** attempt) + random.uniform(0, 1) # used to be 2 **, but waited too long + logging.info(e) + logging.info(f'Encountered error, backing off and retrying in {backoff_time}s...') await asyncio.sleep(backoff_time) else: raise e