openradx · hummerichsander · Sep 25, 2024 · Sep 25, 2024 · Sep 25, 2024
diff --git a/README.md b/README.md
@@ -2,13 +2,22 @@
 This is a simple fastapi based server mock that implements the OpenAI API.
 
 Available endpoints:
+
 - /v1/chat/completion
 
 Instead of running a LLM model to generate completions, it simply returns a response generated by surrogate models. Available surrogate models are:
-- "yes_no": returns random "yes" or "no" response
+
+- "yes_no": returns random "Yes" or "No" response
+- "ja_nein": returns random "Ja" or "Nein" response
 - "lorem_ipsum": returns random "lorem ipsum" text
 
 ## Run via docker:
 ```bash
 docker pull ghcr.io/hummerichsander/openai_api_server_mock:v ... # replace ... with the latest version
 ```
+
+Environment variables:
+
+- `CONTEXT_SIZE`: context size for the model (default: 4096)
+- `SLEEP_TIME`: sleep time in seconds before returning the response (default: 0)
+- `MAX_CONCURRENT_REQUESTS`: maximum number of concurrent requests (default: 10^9)
diff --git a/example_env b/example_env
@@ -1,5 +1,4 @@
-MODEL_CONTEXT_SIZE=4096
-SURROGATE="yes_no"
+CONTEXT_SIZE=4096
 SLEEP_TIME=1
 MAX_CONCURRENT_REQUESTS=1
 LANGUAGE="en"
diff --git a/openai_api_server_mock/chat/surrogates.py b/openai_api_server_mock/chat/surrogates.py
@@ -89,15 +89,23 @@ class YesNoSurrogate(ModelSurrogate):
 
     @classmethod
     async def generate(cls, n: int, messages: List[Message]) -> List[str]:
-        if settings.language == "en":
-            return ["Yes" if random.random() > 0.5 else "No"]
-        elif settings.language == "de":
-            return ["Ja" if random.random() > 0.5 else "Nein"]
+        return ["Yes" if random.random() > 0.5 else "No"]
 
 
 YesNoSurrogate.register()
 
 
+class JaNeinSurrogate(ModelSurrogate):
+    name: str = "ja_nein"
+
+    @classmethod
+    async def generate(cls, n: int, messages: List[Message]) -> List[str]:
+        return ["Ja" if random.random() > 0.5 else "Nein"]
+
+
+JaNeinSurrogate.register()
+
+
 async def get_surrogate(model: str) -> ModelSurrogate:
     global available_surrogates
     for surrogate in available_surrogates:

diff --git a/openai_api_server_mock/settings.py b/openai_api_server_mock/settings.py
@@ -5,7 +5,6 @@
 
 class Settings(BaseSettings):
     context_size: int = Field(alias="CONTEXT_SIZE", default=4096)
-    surrogate: str = Field(alias="SURROGATE", default="lorem_ipsum")
     sleep_time: int = Field(alias="SLEEP_TIME", default=0)
     max_concurrent_requests: int = Field(
         alias="MAX_CONCURRENT_REQUESTS", default=10**9

diff --git a/sandbox.ipynb b/sandbox.ipynb
@@ -20,12 +20,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
     "chat_completion = client.chat.completions.create(\n",
-    "    model=\"yes_no\",\n",
+    "    model=\"ja_nein\",\n",
     "    messages=[\n",
     "        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
     "        {\"role\": \"user\", \"content\": \"Is the sky blue?\"}\n",
@@ -38,16 +38,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[Choice(finish_reason='stop', index=0, logprobs=ChoiceLogprobs(content=[ChatCompletionTokenLogprob(token='No', bytes=None, logprob=-0.4558056105339685, top_logprobs=[TopLogprob(token='Yes', bytes=None, logprob=-2.1267604392490442), TopLogprob(token='No', bytes=None, logprob=-0.7188313398698458), TopLogprob(token='Yes', bytes=None, logprob=-3.7428107344910946)])], refusal=None), message=ChatCompletionMessage(content='Yes', refusal=None, role='assistant', function_call=None, tool_calls=None, name=None))]"
+       "[Choice(finish_reason='length', index=0, logprobs=ChoiceLogprobs(content=[ChatCompletionTokenLogprob(token='Nein', bytes=None, logprob=-0.05135242454878156, top_logprobs=[TopLogprob(token='Ja', bytes=None, logprob=-0.9180391264546016), TopLogprob(token='Nein', bytes=None, logprob=-0.11234122861118023), TopLogprob(token='Ja', bytes=None, logprob=-2.7463193707941906)])], refusal=None), message=ChatCompletionMessage(content='Ja', refusal=None, role='assistant', function_call=None, tool_calls=None, name=None))]"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }