Skip to content

Commit

Permalink
remove langchain (#279)
Browse files Browse the repository at this point in the history
* supported o1

* update pyproject toml

* fix o1

* fix o1

* remvoe langchain

* fix bugs

* support o1

* fix mypy and base url

* aact update

* fix bugs
  • Loading branch information
XuhuiZhou authored Feb 1, 2025
1 parent ea14fda commit fd1b4d9
Show file tree
Hide file tree
Showing 27 changed files with 371 additions and 914 deletions.
4 changes: 2 additions & 2 deletions examples/benchmark_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def evaluate_evaluator(
)
run_async_server_in_batch_aevaluate(
tag=tag,
model=model, # type: ignore
model=model,
batch_size=batch_size,
push_to_db=push_to_db,
verbose=verbose,
Expand All @@ -142,7 +142,7 @@ def evaluate_evaluator(
while to_re_evaluate_list:
run_async_server_in_batch_aevaluate(
tag=tag,
model=model, # type: ignore
model=model,
batch_size=batch_size,
push_to_db=push_to_db,
verbose=verbose,
Expand Down
5 changes: 1 addition & 4 deletions examples/evaluate_existing_episode.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import asyncio
import logging
import subprocess
import typing
from datetime import datetime
from logging import FileHandler

Expand All @@ -11,7 +10,6 @@
from tqdm.asyncio import tqdm_asyncio

from sotopia.database.logs import AnnotationForEpisode, EpisodeLog
from sotopia.generation_utils.generate import LLM_Name
from sotopia.server import aevaluate_one_episode

# date and message only
Expand Down Expand Up @@ -40,7 +38,7 @@

def run_async_server_in_batch_aevaluate(
batch_size: int = 10,
model: LLM_Name = "gpt-4",
model: str = "gpt-4",
reeval_list: list[str] = [],
tag: str | None = None,
push_to_db: bool = False,
Expand Down Expand Up @@ -100,7 +98,6 @@ def run_server(
) -> None:
annotated_episodes_pks = [anno.episode for anno in AnnotationForEpisode.all()]
annotated_episodes_pks = list(set(annotated_episodes_pks))
model = typing.cast(LLM_Name, model)
# Call the function with the specified parameters
run_async_server_in_batch_aevaluate(
tag=tag,
Expand Down
7 changes: 3 additions & 4 deletions examples/experiment_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
SotopiaDimensions,
)
from sotopia.envs.parallel import ParallelSotopiaEnv
from sotopia.generation_utils.generate import LLM_Name
from sotopia.messages import AgentAction, Observation
from sotopia.samplers import (
BaseSampler,
Expand Down Expand Up @@ -73,7 +72,7 @@
def check_existing_episodes(
env_id: str,
agent_ids: list[str],
models: dict[str, LLM_Name],
models: dict[str, str],
tag: str | None = None,
) -> bool:
if tag:
Expand Down Expand Up @@ -106,7 +105,7 @@ def _sample_env_agent_combo_and_push_to_db(env_id: str) -> None:

@gin.configurable
def _iterate_env_agent_combo_not_in_db(
model_names: dict[str, LLM_Name],
model_names: dict[str, str],
env_ids: list[str] = [],
tag: str | None = None,
) -> Generator[EnvAgentCombo[Observation, AgentAction], None, None]:
Expand Down Expand Up @@ -188,7 +187,7 @@ def _iterate_env_agent_combo_not_in_db(
def run_async_server_in_batch(
*,
batch_size: int = 1,
model_names: dict[str, LLM_Name] = {
model_names: dict[str, str] = {
"env": "gpt-4",
"agent1": "gpt-4o-mini",
"agent2": "gpt-4o-mini",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
from sotopia.agents.llm_agent import ainput
from sotopia.experimental.agents.base_agent import BaseAgent

from sotopia.generation_utils import agenerate
from sotopia.generation_utils.generate import StrOutputParser
from sotopia.generation_utils import agenerate, StrOutputParser
from sotopia.messages import ActionType

from pydantic import Field
Expand Down
3 changes: 1 addition & 2 deletions examples/experimental/interview_openhands/llm_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@

from sotopia.experimental.agents.base_agent import BaseAgent

from sotopia.generation_utils import agenerate
from sotopia.generation_utils.generate import StrOutputParser
from sotopia.generation_utils import agenerate, StrOutputParser

import json

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
from sotopia.database.persistent_profile import AgentProfile
from typing import Any

from sotopia.generation_utils import agenerate
from sotopia.generation_utils.generate import StrOutputParser
from sotopia.generation_utils import agenerate, StrOutputParser

# Check Python version
if sys.version_info >= (3, 11):
Expand Down
35 changes: 15 additions & 20 deletions examples/fix_missing_episodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
SotopiaDimensions,
)
from sotopia.envs.parallel import ParallelSotopiaEnv
from sotopia.generation_utils.generate import LLM_Name
from sotopia.messages.message_classes import AgentAction, Observation
from sotopia.samplers.base_sampler import BaseSampler, EnvAgentCombo
from sotopia.server import run_async_server
Expand Down Expand Up @@ -92,10 +91,8 @@ def find_combo_pk(
def get_combo_model_map(
all_episodes: List[EpisodeLog],
all_combos_map: Dict[str, EnvAgentComboStorage],
) -> Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]]:
combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]] = (
defaultdict(Counter)
)
) -> Dict[str, Counter[tuple[str, str, str]]]:
combo_model_map: Dict[str, Counter[tuple[str, str, str]]] = defaultdict(Counter)
bad_combos = []
valid_count = 0
invalid_count = 0
Expand Down Expand Up @@ -132,9 +129,7 @@ def get_combo_model_map(
all_combos_map,
)
if curr_combo_pk:
model_pair: tuple[LLM_Name, LLM_Name, LLM_Name] = cast(
tuple[LLM_Name, LLM_Name, LLM_Name], tuple(curr_ep.models)
)
model_pair: tuple[str, str, str] = tuple(curr_ep.models) # type: ignore
combo_model_map[curr_combo_pk][model_pair] += 1
valid_count += 1
else:
Expand All @@ -153,8 +148,8 @@ def get_combo_model_map(


def get_all_model_pairs(
combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]],
) -> Set[tuple[LLM_Name, LLM_Name, LLM_Name]]:
combo_model_map: Dict[str, Counter[tuple[str, str, str]]],
) -> Set[tuple[str, str, str]]:
all_model_pairs = set()
for key in combo_model_map:
for combo in combo_model_map[key]:
Expand All @@ -169,12 +164,12 @@ def get_all_model_pairs(


def get_all_missing_model_pairs(
combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]],
all_model_pairs: Set[tuple[LLM_Name, LLM_Name, LLM_Name]],
combo_model_map: Dict[str, Counter[tuple[str, str, str]]],
all_model_pairs: Set[tuple[str, str, str]],
num_required: int,
) -> Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]]:
combo_missing_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]] = (
defaultdict(Counter)
) -> Dict[str, Counter[tuple[str, str, str]]]:
combo_missing_model_map: Dict[str, Counter[tuple[str, str, str]]] = defaultdict(
Counter
)
missing_count = 0
for key in combo_model_map:
Expand All @@ -192,9 +187,9 @@ def get_all_missing_model_pairs(
# temporally used for making sure unique (env, agents, models) setting; need to change
# according to the Counter in the case needing to run multiple experiments for one setting
def get_missing_model_combo_map(
combo_missing_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name]]],
combo_missing_model_map: Dict[str, Counter[tuple[str, str, str]]],
all_combos_map: Dict[str, EnvAgentComboStorage],
) -> Dict[tuple[LLM_Name, LLM_Name], List[tuple[str, str, str]]]:
) -> Dict[tuple[str, str], List[tuple[str, str, str]]]:
missing_model_combo_map = defaultdict(list)
for combo_pk in combo_missing_model_map:
model_counter = combo_missing_model_map[combo_pk]
Expand All @@ -216,7 +211,7 @@ def get_missing_model_combo_map(


def yield_env_agent_combo(
combo_ids: list[tuple[str, str, str]], model_names: dict[str, LLM_Name]
combo_ids: list[tuple[str, str, str]], model_names: dict[str, str]
) -> Generator[EnvAgentCombo[Observation, AgentAction], None, None]:
for combo_id in combo_ids:
env_id, agent_id1, agent_id2 = combo_id
Expand Down Expand Up @@ -249,8 +244,8 @@ def yield_env_agent_combo(

@gin.configurable
def re_run_missing_episodes(
combo_with_models: dict[tuple[LLM_Name, LLM_Name], list[tuple[str, str, str]]],
model_names: dict[str, LLM_Name] = {
combo_with_models: dict[tuple[str, str], list[tuple[str, str, str]]],
model_names: dict[str, str] = {
"env": "gpt-4",
"agent1": "gpt-4o-mini",
"agent2": "gpt-4o-mini",
Expand Down
41 changes: 20 additions & 21 deletions examples/fix_missing_episodes_with_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
SotopiaDimensions,
)
from sotopia.envs.parallel import ParallelSotopiaEnv
from sotopia.generation_utils.generate import LLM_Name
from sotopia.messages.message_classes import AgentAction, Observation
from sotopia.samplers.base_sampler import BaseSampler, EnvAgentCombo
from sotopia.server import arun_one_script, run_async_server
Expand Down Expand Up @@ -121,9 +120,9 @@ def find_combo_pk(
def get_combo_model_map(
all_episodes: List[Tuple[EpisodeLog, str]],
all_combos_map: Dict[str, EnvAgentComboStorage],
) -> Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name, str]]]:
combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name, str]]] = (
defaultdict(Counter)
) -> Dict[str, Counter[tuple[str, str, str, str]]]:
combo_model_map: Dict[str, Counter[tuple[str, str, str, str]]] = defaultdict(
Counter
)

bad_combos = []
Expand Down Expand Up @@ -195,9 +194,11 @@ def get_combo_model_map(
combined = copy.deepcopy(curr_ep.models)
combined.append(curr_tag)

model_pair: tuple[LLM_Name, LLM_Name, LLM_Name, str] = cast(
tuple[LLM_Name, LLM_Name, LLM_Name, str], tuple(combined)
)
# Add length check and explicit casting
if len(combined) != 4:
continue

model_pair = (combined[0], combined[1], combined[2], combined[3])
combo_model_map[curr_combo_pk][model_pair] += 1
valid_count += 1
else:
Expand Down Expand Up @@ -226,8 +227,8 @@ def get_combo_model_map(


def get_all_model_pairs(
combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name, str]]],
) -> Set[tuple[LLM_Name, LLM_Name, LLM_Name, str]]:
combo_model_map: Dict[str, Counter[tuple[str, str, str, str]]],
) -> Set[tuple[str, str, str, str]]:
all_model_pairs = set()
for key in combo_model_map:
for combo in combo_model_map[key]:
Expand All @@ -242,19 +243,19 @@ def get_all_model_pairs(


def get_all_missing_model_pairs(
combo_model_map: Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name, str]]],
all_model_pairs: Set[tuple[LLM_Name, LLM_Name, LLM_Name, str]],
combo_model_map: Dict[str, Counter[tuple[str, str, str, str]]],
all_model_pairs: Set[tuple[str, str, str, str]],
num_required: int,
all_combos_map: Dict[str, EnvAgentComboStorage] = {},
add_missing_env: bool = False,
) -> Dict[str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name, str]]]:
) -> Dict[str, Counter[tuple[str, str, str, str]]]:
"""
all_combos_map: if add_missing_env is True, then we need to provide all combos map
add_missing_env: if True, add missing env to the map, else just match the model pairs among selected tags
"""
combo_missing_model_map: Dict[
str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name, str]]
] = defaultdict(Counter)
combo_missing_model_map: Dict[str, Counter[tuple[str, str, str, str]]] = (
defaultdict(Counter)
)

if add_missing_env:
for combo_key in all_combos_map:
Expand Down Expand Up @@ -282,11 +283,9 @@ def get_all_missing_model_pairs(
# temporally used for making sure unique (env, agents, models) setting; need to change
# according to the Counter in the case needing to run multiple experiments for one setting
def get_missing_model_combo_map(
combo_missing_model_map: Dict[
str, Counter[tuple[LLM_Name, LLM_Name, LLM_Name, str]]
],
combo_missing_model_map: Dict[str, Counter[tuple[str, str, str, str]]],
all_combos_map: Dict[str, EnvAgentComboStorage],
) -> Dict[tuple[LLM_Name, LLM_Name, LLM_Name, str], List[tuple[str, str, str]]]:
) -> Dict[tuple[str, str, str, str], List[tuple[str, str, str]]]:
missing_model_combo_map = defaultdict(list)
for combo_pk in combo_missing_model_map:
model_counter = combo_missing_model_map[combo_pk]
Expand Down Expand Up @@ -314,7 +313,7 @@ def get_missing_model_combo_map(


def yield_env_agent_combo(
combo_ids: list[tuple[str, str, str]], model_names: dict[str, LLM_Name]
combo_ids: list[tuple[str, str, str]], model_names: dict[str, str]
) -> Generator[EnvAgentCombo[Observation, AgentAction], None, None]:
for combo_id in combo_ids:
env_id, agent_id1, agent_id2 = combo_id
Expand Down Expand Up @@ -348,7 +347,7 @@ def yield_env_agent_combo(
@gin.configurable
def re_run_missing_episodes(
env_agent_ids: List[Tuple[str, str, str]] = [],
model_names: dict[str, LLM_Name] = {
model_names: dict[str, str] = {
"env": "gpt-4",
"agent1": "gpt-4o-mini",
"agent2": "gpt-4o-mini",
Expand Down
6 changes: 1 addition & 5 deletions examples/generate_scenarios.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,6 @@

from sotopia.database import EnvAgentComboStorage, EnvironmentProfile
from sotopia.database.persistent_profile import RelationshipType
from sotopia.generation_utils import (
LLM_Name,
)
from .generate_specific_envs import ( # type:ignore[import-untyped]
generate_craigslist_bargains_envs,
generate_mutual_friend_envs,
Expand Down Expand Up @@ -53,7 +50,7 @@ def check_existing_envs(

def generate_newenv_profile(
num: int,
gen_model: LLM_Name = "gpt-4-turbo-2024-04-09",
gen_model: str = "gpt-4-turbo-2024-04-09",
temperature: float = 0.5,
type: str = "craigslist_bargains",
) -> pd.DataFrame:
Expand Down Expand Up @@ -101,7 +98,6 @@ def auto_generate_scenarios(
"""
Function to generate new environment scenarios based on target number of generation
"""
gen_model = cast(LLM_Name, gen_model)
all_background_df = generate_newenv_profile(num, gen_model, temperature)
columns = [
"codename",
Expand Down
10 changes: 4 additions & 6 deletions examples/generate_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
from rich.logging import RichHandler
from tqdm import tqdm
from tqdm.asyncio import tqdm_asyncio

from sotopia.generation_utils.generate import LLM_Name
from sotopia.messages.message_classes import AgentAction, Observation
from sotopia.samplers import EnvAgentCombo
from sotopia.server import arun_one_script, run_async_server
Expand All @@ -38,7 +36,7 @@

@gin.configurable
def single_step(
model_names: dict[str, LLM_Name],
model_names: dict[str, str],
tag: str | None = None,
batch_size: int = 5,
push_to_db: bool = True,
Expand Down Expand Up @@ -104,7 +102,7 @@ def single_step(

@gin.configurable
def full_freeform(
model_names: dict[str, LLM_Name],
model_names: dict[str, str],
tag: str | None = None,
batch_size: int = 5,
push_to_db: bool = True,
Expand Down Expand Up @@ -175,14 +173,14 @@ def full_freeform(
def run_async_server_in_batch_script(
*,
batch_size: int = 10,
model: LLM_Name = "gpt-4o-mini",
model: str = "gpt-4o-mini",
tag: str | None = None,
push_to_db: bool = True,
json_in_script: bool = False,
generate_in_full: bool = False,
verbose: bool = False,
) -> None:
model_names: dict[str, LLM_Name] = {
model_names: dict[str, str] = {
"env": model,
"agent1": model,
"agent2": model,
Expand Down
2 changes: 1 addition & 1 deletion examples/generate_specific_envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import numpy as np
from datasets import DatasetDict, load_dataset

from sotopia.generation_utils.generate import StrOutputParser, agenerate
from sotopia.generation_utils import agenerate, StrOutputParser


async def generate_mutual_friend_envs() -> tuple[str, list[str]]:
Expand Down
Loading

0 comments on commit fd1b4d9

Please sign in to comment.