add ci test with docker

sotopia-lab · Oct 10, 2024 · 7cee92d · 7cee92d
1 parent 65c19a1
commit 7cee92d
Show file tree

Hide file tree

Showing 14 changed files with 191 additions and 157 deletions.
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -1,7 +1,6 @@
 FROM mcr.microsoft.com/devcontainers/python:1-3.12-bullseye
 
-RUN curl -fsSL https://ollama.com/install.sh | sh\
-   && pip install uv\
+RUN pip install uv\
    && uv venv /workspaces/.venv\
    && export UV_PROJECT_ENVIRONMENT=/workspaces/.venv\
    && echo export UV_PROJECT_ENVIRONMENT=/workspaces/.venv >> /root/.bashrc
diff --git a/.devcontainer/Dockerfile.llamacpp b/.devcontainer/Dockerfile.llamacpp
@@ -0,0 +1,9 @@
+FROM ghcr.io/ggerganov/llama.cpp:server
+
+# Install curl and other necessary utilities
+RUN apt-get update && \
+    apt-get install -y curl && \
+    rm -rf /var/lib/apt/lists/* && \
+    curl -L https://huggingface.co/unsloth/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q8_0.gguf --output Llama-3.2.gguf
+
+CMD ["-m", "Llama-3.2.gguf", "--port", "8000"]
diff --git a/.devcontainer/Dockerfile.ollama b/.devcontainer/Dockerfile.ollama
diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml
@@ -2,31 +2,24 @@ services:
   devcontainer:
     image: "ghcr.io/sotopia-lab/sotopia:latest"
     depends_on:
-      ollama:
-        condition: service_healthy
+      llamacpp:
+        condition: service_started
       redis:
         condition: service_healthy
     volumes:
       - ../..:/workspaces:cached
     command: sleep infinity
     network_mode: "host"
 
-  ollama:
-    image: "ghcr.io/sotopia-lab/ollama:latest"
+  llamacpp:
+    image: "ghcr.io/sotopia-lab/llamacpp:latest"
     ports:
-      - 11434:11434
-    container_name: ollama
-    pull_policy: always
-    restart: always
+      - 8000:8000
+    container_name: llamacpp
     network_mode: "host"
-    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:11434/api/version"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
 
   redis:
-    image: redis/redis-stack-server:7.2.0-v6
+    image: redis/redis-stack-server:latest
     ports:
       - 6379:6379
     healthcheck:

diff --git a/.github/workflows/tests.yml → .github/workflows/cli_tests.yml b/.github/workflows/tests.yml → .github/workflows/cli_tests.yml
@@ -1,4 +1,4 @@
-name: Pytest
+name: Pytest (Installation)
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}
@@ -40,12 +40,8 @@ jobs:
           python -m pip install uv
           uv sync --extra test --extra chat
     - name: Test with pytest
-      env: # Or as an environment variable
-        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        REDIS_OM_URL: ${{ secrets.REDIS_OM_URL }}
-        TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
       run: |
-        uv run pytest --cov=. --cov-report=xml
+        uv run pytest tests/cli --cov=. --cov-report=xml
     - name: Upload coverage report to Codecov
       uses: codecov/[email protected]
       with:

diff --git a/.github/workflows/tests_in_docker.yml b/.github/workflows/tests_in_docker.yml
@@ -0,0 +1,41 @@
+name: Pytest in docker
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+on:
+  push:
+    branches:
+      - main
+      - release
+      - dev
+  pull_request:
+    branches:
+      - main
+      - release
+
+jobs:
+  Pytest:
+    strategy:
+      max-parallel: 5
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v4
+    - name: Set up Docker
+      if: runner.os == 'ubuntu-latest'
+      uses: docker-practice/actions-setup-docker@master
+      timeout-minutes: 12
+    - name: Docker compose up
+      run: |
+        docker-compose -f .devcontainer/docker-compose.yml up -d
+    - name: Test with pytest
+      run: |
+        docker compose -f .devcontainer/docker-compose.yml  run --rm -u root -v ./:/workspaces/sotopia devcontainer /bin/sh -c "export UV_PROJECT_ENVIRONMENT=/workspaces/.venv; cd /workspaces/sotopia; uv run --extra test --extra chat pytest --ignore tests/cli --cov=app --cov-report=xml"
+    - name: Upload coverage report to Codecov
+      uses: codecov/[email protected]
+      with:
+        token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/sotopia/database/__init__.py b/sotopia/database/__init__.py
@@ -1,5 +1,5 @@
 from typing import TypeVar
-from redis_om import JsonModel
+from redis_om import JsonModel, Migrator
 from .annotators import Annotator
 from .env_agent_combo_storage import EnvAgentComboStorage
 from .logs import AnnotationForEpisode, EpisodeLog
@@ -71,3 +71,5 @@ def _json_model_all(cls: type[InheritedJsonModel]) -> list[InheritedJsonModel]:
 
 
 JsonModel.all = classmethod(_json_model_all)  # type: ignore[assignment,method-assign]
+
+Migrator().run()
diff --git a/sotopia/envs/evaluators.py b/sotopia/envs/evaluators.py
@@ -315,6 +315,7 @@ async def __acall__(
                     pydantic_object=self.response_format_class
                 ),
                 temperature=temperature,
+                structured_output=self.model_name.startswith("custom"),
             )
             response_list = []
             # TODO: multiple agents
@@ -345,6 +346,7 @@ async def __acall__(
                 )
             return response_list
         except Exception as e:
+            print(e)
             log.debug(f"[red] Failed to generate environment response. {e}")
             return []
 

diff --git a/sotopia/generation_utils/generate.py b/sotopia/generation_utils/generate.py
@@ -486,9 +486,9 @@ async def agenerate(
         input_values["format_instructions"] = output_parser.get_format_instructions()
 
     if structured_output:
-        assert (
-            model_name == "gpt-4o-2024-08-06"
-        ), "Structured output is only supported in gpt-4o-2024-08-06"
+        assert model_name == "gpt-4o-2024-08-06" or model_name.startswith(
+            "custom"
+        ), "Structured output is only supported in gpt-4o-2024-08-06 or custom models"
         human_message_prompt = HumanMessagePromptTemplate(
             prompt=PromptTemplate(
                 template=template,
@@ -501,7 +501,15 @@ async def agenerate(
         instantiated_prompt = prompt_result.messages[0].content
         assert isinstance(output_parser, PydanticOutputParser)
         assert isinstance(instantiated_prompt, str)
-        client = OpenAI()
+        if model_name.startswith("custom"):
+            client = OpenAI(
+                base_url=model_name.split("@")[1],
+                api_key=os.environ.get("CUSTOM_API_KEY") or "EMPTY",
+            )
+            model_name = model_name.split("@")[0].split("/")[1]
+        else:
+            client = OpenAI()
+
         completion = client.beta.chat.completions.parse(
             model=model_name,
             messages=[

diff --git a/test-structured_output.py b/test-structured_output.py
@@ -0,0 +1,39 @@
+from pydantic import BaseModel
+from openai import OpenAI
+
+# client = OpenAI(base_url="http://localhost:8000/v1", api_key="")
+
+
+class CalendarEvent(BaseModel):
+    name: str
+    date: str
+    participants: list[str]
+
+
+# completion = client.beta.chat.completions.parse(
+#     model="llama3.2:1b",
+#     messages=[
+#         {"role": "system", "content": "Extract the event information."},
+#         {"role": "user", "content": "Alice and Bob are going to a science fair on Friday."},
+#     ],
+#     # response_format=CalendarEvent,
+# )
+
+# event = completion.choices[0].message.parsed
+
+client = OpenAI(
+    base_url="http://localhost:8000/v1",
+    api_key="ollama",  # required, but unused
+)
+
+response = client.beta.chat.completions.parse(
+    model="llama3.2:1b",
+    messages=[
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Who won the world series in 2020?"},
+        {"role": "assistant", "content": "The LA Dodgers won in 2020."},
+        {"role": "user", "content": "Where was it played?"},
+    ],
+    response_format=CalendarEvent,
+)
+print(response.choices[0].message.parsed)
diff --git a/tests/database/test_serialization.py b/tests/database/test_serialization.py
@@ -1,3 +1,5 @@
+from typing import Generator
+import pytest
 from sotopia.database import (
     AgentProfile,
     RelationshipProfile,
@@ -24,11 +26,34 @@
 import csv
 
 
-def test_episode_log_serialization() -> None:
+@pytest.fixture
+def _test_create_episode_log_setup_and_tear_down() -> Generator[None, None, None]:
+    AgentProfile(first_name="John", last_name="Doe", pk="tmppk_agent1").save()
+    AgentProfile(first_name="Jane", last_name="Doe", pk="tmppk_agent2").save()
+    EnvironmentProfile(
+        pk="tmppk_environment",
+        codename="borrow_money",
+        source="hand-craft",
+        scenario="Conversation between two friends at a tea party",
+        agent_goals=[
+            "Borrow money (<extra_info>Extra information: you need $3000 to support life.</extra_info>)",
+            "Maintain financial stability while maintaining friendship (<extra_info>Extra information: you only have $2000 available right now. <clarification_hint>Hint: you can not lend all $2000 since you still need to maintain your financial stability.</clarification_hint></extra_info>)",
+        ],
+    ).save()
+    yield
+    AgentProfile.delete("tmppk_agent1")
+    AgentProfile.delete("tmppk_agent2")
+    EnvironmentProfile.delete("tmppk_environment")
+    EpisodeLog.delete("tmppk_episode_log")
+
+
+def test_episode_log_serialization(
+    _test_create_episode_log_setup_and_tear_down: None,
+) -> None:
     episode_log = EpisodeLog(
         pk="01H9FG15A2NDTNH8K6F2T5MZN3",
-        environment="01H7VFHNN7XTR99319DS8KZCQM",
-        agents=["01H5TNE5PAZABGW79HJ07TACCZ", "01H5TNE5P83CZ1TDBVN74NGEEJ"],
+        environment="tmppk_environment",
+        agents=["tmppk_agent1", "tmppk_agent2"],
         tag="togethercomputer/mpt-30b-chat_togethercomputer/llama-2-70b-chat_v0.0.1_clean",
         models=[
             "gpt-4",