remove langchain (#279)

* supported o1 * update pyproject toml * fix o1 * fix o1 * remvoe langchain * fix bugs * support o1 * fix mypy and base url * aact update * fix bugs
sotopia-lab · Feb 1, 2025 · fd1b4d9 · fd1b4d9
1 parent ea14fda
commit fd1b4d9
Show file tree

Hide file tree

Showing 27 changed files with 371 additions and 914 deletions.
diff --git a/examples/benchmark_evaluator.py b/examples/benchmark_evaluator.py
@@ -123,7 +123,7 @@ def evaluate_evaluator(
         )
         run_async_server_in_batch_aevaluate(
             tag=tag,
-            model=model,  # type: ignore
+            model=model,
             batch_size=batch_size,
             push_to_db=push_to_db,
             verbose=verbose,
@@ -142,7 +142,7 @@ def evaluate_evaluator(
         while to_re_evaluate_list:
             run_async_server_in_batch_aevaluate(
                 tag=tag,
-                model=model,  # type: ignore
+                model=model,
                 batch_size=batch_size,
                 push_to_db=push_to_db,
                 verbose=verbose,

diff --git a/examples/evaluate_existing_episode.py b/examples/evaluate_existing_episode.py
@@ -1,7 +1,6 @@
 import asyncio
 import logging
 import subprocess
-import typing
 from datetime import datetime
 from logging import FileHandler
 
@@ -11,7 +10,6 @@
 from tqdm.asyncio import tqdm_asyncio
 
 from sotopia.database.logs import AnnotationForEpisode, EpisodeLog
-from sotopia.generation_utils.generate import LLM_Name
 from sotopia.server import aevaluate_one_episode
 
 # date and message only
@@ -40,7 +38,7 @@
 
 def run_async_server_in_batch_aevaluate(
     batch_size: int = 10,
-    model: LLM_Name = "gpt-4",
+    model: str = "gpt-4",
     reeval_list: list[str] = [],
     tag: str | None = None,
     push_to_db: bool = False,
@@ -100,7 +98,6 @@ def run_server(
 ) -> None:
     annotated_episodes_pks = [anno.episode for anno in AnnotationForEpisode.all()]
     annotated_episodes_pks = list(set(annotated_episodes_pks))
-    model = typing.cast(LLM_Name, model)
     # Call the function with the specified parameters
     run_async_server_in_batch_aevaluate(
         tag=tag,

diff --git a/examples/experiment_eval.py b/examples/experiment_eval.py
@@ -26,7 +26,6 @@
     SotopiaDimensions,
 )
 from sotopia.envs.parallel import ParallelSotopiaEnv
-from sotopia.generation_utils.generate import LLM_Name
 from sotopia.messages import AgentAction, Observation
 from sotopia.samplers import (
     BaseSampler,
@@ -73,7 +72,7 @@
 def check_existing_episodes(
     env_id: str,
     agent_ids: list[str],
-    models: dict[str, LLM_Name],
+    models: dict[str, str],
     tag: str | None = None,
 ) -> bool:
     if tag:
@@ -106,7 +105,7 @@ def _sample_env_agent_combo_and_push_to_db(env_id: str) -> None:
 
 @gin.configurable
 def _iterate_env_agent_combo_not_in_db(
-    model_names: dict[str, LLM_Name],
+    model_names: dict[str, str],
     env_ids: list[str] = [],
     tag: str | None = None,
 ) -> Generator[EnvAgentCombo[Observation, AgentAction], None, None]:
@@ -188,7 +187,7 @@ def _iterate_env_agent_combo_not_in_db(
 def run_async_server_in_batch(
     *,
     batch_size: int = 1,
-    model_names: dict[str, LLM_Name] = {
+    model_names: dict[str, str] = {
         "env": "gpt-4",
         "agent1": "gpt-4o-mini",
         "agent2": "gpt-4o-mini",

diff --git a/examples/experimental/group_discussion_agents/group_discussion_agents.py b/examples/experimental/group_discussion_agents/group_discussion_agents.py
@@ -4,8 +4,7 @@
 from sotopia.agents.llm_agent import ainput
 from sotopia.experimental.agents.base_agent import BaseAgent
 
-from sotopia.generation_utils import agenerate
-from sotopia.generation_utils.generate import StrOutputParser
+from sotopia.generation_utils import agenerate, StrOutputParser
 from sotopia.messages import ActionType
 
 from pydantic import Field

diff --git a/examples/experimental/interview_openhands/llm_agent.py b/examples/experimental/interview_openhands/llm_agent.py
@@ -12,8 +12,7 @@
 
 from sotopia.experimental.agents.base_agent import BaseAgent
 
-from sotopia.generation_utils import agenerate
-from sotopia.generation_utils.generate import StrOutputParser
+from sotopia.generation_utils import agenerate, StrOutputParser
 
 import json
 

diff --git a/examples/experimental/sotopia_original_replica/llm_agent_sotopia.py b/examples/experimental/sotopia_original_replica/llm_agent_sotopia.py
@@ -10,8 +10,7 @@
 from sotopia.database.persistent_profile import AgentProfile
 from typing import Any
 
-from sotopia.generation_utils import agenerate
-from sotopia.generation_utils.generate import StrOutputParser
+from sotopia.generation_utils import agenerate, StrOutputParser
 
 # Check Python version
 if sys.version_info >= (3, 11):

diff --git a/examples/fix_missing_episodes.py b/examples/fix_missing_episodes.py
@@ -25,7 +25,6 @@
     SotopiaDimensions,
 )
 from sotopia.envs.parallel import ParallelSotopiaEnv
-from sotopia.generation_utils.generate import LLM_Name
 from sotopia.messages.message_classes import AgentAction, Observation
 from sotopia.samplers.base_sampler import BaseSampler, EnvAgentCombo
 from sotopia.server import run_async_server
@@ -92,10 +91,8 @@ def find_combo_pk(
 def get_combo_model_map(
     all_episodes: List[EpisodeLog],
     all_combos_map: Dict[str, EnvAgentComboStorage],
-) -> Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]]:
-    combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]] = (
-        defaultdict(Counter)
-    )
+) -> Dict[str, Counter[tuple[str, str, str]]]:
+    combo_model_map: Dict[str, Counter[tuple[str, str, str]]] = defaultdict(Counter)
     bad_combos = []
     valid_count = 0
     invalid_count = 0
@@ -132,9 +129,7 @@ def get_combo_model_map(
             all_combos_map,
         )
         if curr_combo_pk:
-            model_pair: tuple[LLM_Name, LLM_Name, LLM_Name] = cast(
-                tuple[LLM_Name, LLM_Name, LLM_Name], tuple(curr_ep.models)
-            )
+            model_pair: tuple[str, str, str] = tuple(curr_ep.models)  # type: ignore
             combo_model_map[curr_combo_pk][model_pair] += 1
             valid_count += 1
         else:
@@ -153,8 +148,8 @@ def get_combo_model_map(
 
 
 def get_all_model_pairs(
-    combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]],
-) -> Set[tuple[LLM_Name, LLM_Name, LLM_Name]]:
+    combo_model_map: Dict[str, Counter[tuple[str, str, str]]],
+) -> Set[tuple[str, str, str]]:
     all_model_pairs = set()
     for key in combo_model_map:
         for combo in combo_model_map[key]:
@@ -169,12 +164,12 @@ def get_all_model_pairs(
 
 
 def get_all_missing_model_pairs(
-    combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]],
-    all_model_pairs: Set[tuple[LLM_Name, LLM_Name, LLM_Name]],
+    combo_model_map: Dict[str, Counter[tuple[str, str, str]]],
+    all_model_pairs: Set[tuple[str, str, str]],
     num_required: int,
-) -> Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]]:
-    combo_missing_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]] = (
-        defaultdict(Counter)
+) -> Dict[str, Counter[tuple[str, str, str]]]:
+    combo_missing_model_map: Dict[str, Counter[tuple[str, str, str]]] = defaultdict(
+        Counter
     )
     missing_count = 0
     for key in combo_model_map:
@@ -192,9 +187,9 @@ def get_all_missing_model_pairs(
 # temporally used for making sure unique (env, agents, models) setting; need to change
 # according to the Counter in the case needing to run multiple experiments for one setting
 def get_missing_model_combo_map(
-    combo_missing_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]],
+    combo_missing_model_map: Dict[str, Counter[tuple[str, str, str]]],
     all_combos_map: Dict[str, EnvAgentComboStorage],
-) -> Dict[tuple[LLM_Name, LLM_Name], List[tuple[str, str, str]]]:
+) -> Dict[tuple[str, str], List[tuple[str, str, str]]]:
     missing_model_combo_map = defaultdict(list)
     for combo_pk in combo_missing_model_map:
         model_counter = combo_missing_model_map[combo_pk]
@@ -216,7 +211,7 @@ def get_missing_model_combo_map(
 
 
 def yield_env_agent_combo(
-    combo_ids: list[tuple[str, str, str]], model_names: dict[str, LLM_Name]
+    combo_ids: list[tuple[str, str, str]], model_names: dict[str, str]
 ) -> Generator[EnvAgentCombo[Observation, AgentAction], None, None]:
     for combo_id in combo_ids:
         env_id, agent_id1, agent_id2 = combo_id
@@ -249,8 +244,8 @@ def yield_env_agent_combo(
 
 @gin.configurable
 def re_run_missing_episodes(
-    combo_with_models: dict[tuple[LLM_Name, LLM_Name], list[tuple[str, str, str]]],
-    model_names: dict[str, LLM_Name] = {
+    combo_with_models: dict[tuple[str, str], list[tuple[str, str, str]]],
+    model_names: dict[str, str] = {
         "env": "gpt-4",
         "agent1": "gpt-4o-mini",
         "agent2": "gpt-4o-mini",

diff --git a/examples/fix_missing_episodes_with_tag.py b/examples/fix_missing_episodes_with_tag.py
@@ -41,7 +41,6 @@
     SotopiaDimensions,
 )
 from sotopia.envs.parallel import ParallelSotopiaEnv
-from sotopia.generation_utils.generate import LLM_Name
 from sotopia.messages.message_classes import AgentAction, Observation
 from sotopia.samplers.base_sampler import BaseSampler, EnvAgentCombo
 from sotopia.server import arun_one_script, run_async_server
@@ -121,9 +120,9 @@ def find_combo_pk(
 def get_combo_model_map(
     all_episodes: List[Tuple[EpisodeLog, str]],
     all_combos_map: Dict[str, EnvAgentComboStorage],
-) -> Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name, str]]]:
-    combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name, str]]] = (
-        defaultdict(Counter)
+) -> Dict[str, Counter[tuple[str, str, str, str]]]:
+    combo_model_map: Dict[str, Counter[tuple[str, str, str, str]]] = defaultdict(
+        Counter
     )
 
     bad_combos = []
@@ -195,9 +194,11 @@ def get_combo_model_map(
             combined = copy.deepcopy(curr_ep.models)
             combined.append(curr_tag)
 
-            model_pair: tuple[LLM_Name, LLM_Name, LLM_Name, str] = cast(
-                tuple[LLM_Name, LLM_Name, LLM_Name, str], tuple(combined)
-            )
+            # Add length check and explicit casting
+            if len(combined) != 4:
+                continue
+
+            model_pair = (combined[0], combined[1], combined[2], combined[3])
             combo_model_map[curr_combo_pk][model_pair] += 1
             valid_count += 1
         else:
@@ -226,8 +227,8 @@ def get_combo_model_map(
 
 
 def get_all_model_pairs(
-    combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name, str]]],
-) -> Set[tuple[LLM_Name, LLM_Name, LLM_Name, str]]:
+    combo_model_map: Dict[str, Counter[tuple[str, str, str, str]]],
+) -> Set[tuple[str, str, str, str]]:
     all_model_pairs = set()
     for key in combo_model_map:
         for combo in combo_model_map[key]:
@@ -242,19 +243,19 @@ def get_all_model_pairs(
 
 
 def get_all_missing_model_pairs(
-    combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name, str]]],
-    all_model_pairs: Set[tuple[LLM_Name, LLM_Name, LLM_Name, str]],
+    combo_model_map: Dict[str, Counter[tuple[str, str, str, str]]],
+    all_model_pairs: Set[tuple[str, str, str, str]],
     num_required: int,
     all_combos_map: Dict[str, EnvAgentComboStorage] = {},
     add_missing_env: bool = False,
-) -> Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name, str]]]:
+) -> Dict[str, Counter[tuple[str, str, str, str]]]:
     """
     all_combos_map: if add_missing_env is True, then we need to provide all combos map
     add_missing_env: if True, add missing env to the map, else just match the model pairs among selected tags
     """
-    combo_missing_model_map: Dict[
-        str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name, str]]
-    ] = defaultdict(Counter)
+    combo_missing_model_map: Dict[str, Counter[tuple[str, str, str, str]]] = (
+        defaultdict(Counter)
+    )
 
     if add_missing_env:
         for combo_key in all_combos_map:
@@ -282,11 +283,9 @@ def get_all_missing_model_pairs(
 # temporally used for making sure unique (env, agents, models) setting; need to change
 # according to the Counter in the case needing to run multiple experiments for one setting
 def get_missing_model_combo_map(
-    combo_missing_model_map: Dict[
-        str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name, str]]
-    ],
+    combo_missing_model_map: Dict[str, Counter[tuple[str, str, str, str]]],
     all_combos_map: Dict[str, EnvAgentComboStorage],
-) -> Dict[tuple[LLM_Name, LLM_Name, LLM_Name, str], List[tuple[str, str, str]]]:
+) -> Dict[tuple[str, str, str, str], List[tuple[str, str, str]]]:
     missing_model_combo_map = defaultdict(list)
     for combo_pk in combo_missing_model_map:
         model_counter = combo_missing_model_map[combo_pk]
@@ -314,7 +313,7 @@ def get_missing_model_combo_map(
 
 
 def yield_env_agent_combo(
-    combo_ids: list[tuple[str, str, str]], model_names: dict[str, LLM_Name]
+    combo_ids: list[tuple[str, str, str]], model_names: dict[str, str]
 ) -> Generator[EnvAgentCombo[Observation, AgentAction], None, None]:
     for combo_id in combo_ids:
         env_id, agent_id1, agent_id2 = combo_id
@@ -348,7 +347,7 @@ def yield_env_agent_combo(
 @gin.configurable
 def re_run_missing_episodes(
     env_agent_ids: List[Tuple[str, str, str]] = [],
-    model_names: dict[str, LLM_Name] = {
+    model_names: dict[str, str] = {
         "env": "gpt-4",
         "agent1": "gpt-4o-mini",
         "agent2": "gpt-4o-mini",

diff --git a/examples/generate_scenarios.py b/examples/generate_scenarios.py
@@ -10,9 +10,6 @@
 
 from sotopia.database import EnvAgentComboStorage, EnvironmentProfile
 from sotopia.database.persistent_profile import RelationshipType
-from sotopia.generation_utils import (
-    LLM_Name,
-)
 from .generate_specific_envs import (  # type:ignore[import-untyped]
     generate_craigslist_bargains_envs,
     generate_mutual_friend_envs,
@@ -53,7 +50,7 @@ def check_existing_envs(
 
 def generate_newenv_profile(
     num: int,
-    gen_model: LLM_Name = "gpt-4-turbo-2024-04-09",
+    gen_model: str = "gpt-4-turbo-2024-04-09",
     temperature: float = 0.5,
     type: str = "craigslist_bargains",
 ) -> pd.DataFrame:
@@ -101,7 +98,6 @@ def auto_generate_scenarios(
     """
     Function to generate new environment scenarios based on target number of generation
     """
-    gen_model = cast(LLM_Name, gen_model)
     all_background_df = generate_newenv_profile(num, gen_model, temperature)
     columns = [
         "codename",

diff --git a/examples/generate_script.py b/examples/generate_script.py
@@ -11,8 +11,6 @@
 from rich.logging import RichHandler
 from tqdm import tqdm
 from tqdm.asyncio import tqdm_asyncio
-
-from sotopia.generation_utils.generate import LLM_Name
 from sotopia.messages.message_classes import AgentAction, Observation
 from sotopia.samplers import EnvAgentCombo
 from sotopia.server import arun_one_script, run_async_server
@@ -38,7 +36,7 @@
 
 @gin.configurable
 def single_step(
-    model_names: dict[str, LLM_Name],
+    model_names: dict[str, str],
     tag: str | None = None,
     batch_size: int = 5,
     push_to_db: bool = True,
@@ -104,7 +102,7 @@ def single_step(
 
 @gin.configurable
 def full_freeform(
-    model_names: dict[str, LLM_Name],
+    model_names: dict[str, str],
     tag: str | None = None,
     batch_size: int = 5,
     push_to_db: bool = True,
@@ -175,14 +173,14 @@ def full_freeform(
 def run_async_server_in_batch_script(
     *,
     batch_size: int = 10,
-    model: LLM_Name = "gpt-4o-mini",
+    model: str = "gpt-4o-mini",
     tag: str | None = None,
     push_to_db: bool = True,
     json_in_script: bool = False,
     generate_in_full: bool = False,
     verbose: bool = False,
 ) -> None:
-    model_names: dict[str, LLM_Name] = {
+    model_names: dict[str, str] = {
         "env": model,
         "agent1": model,
         "agent2": model,

diff --git a/examples/generate_specific_envs.py b/examples/generate_specific_envs.py
@@ -12,7 +12,7 @@
 import numpy as np
 from datasets import DatasetDict, load_dataset
 
-from sotopia.generation_utils.generate import StrOutputParser, agenerate
+from sotopia.generation_utils import agenerate, StrOutputParser
 
 
 async def generate_mutual_friend_envs() -> tuple[str, list[str]]: