diff --git a/docs/pages/benchmark.md b/docs/pages/benchmark.md index ef45a802a..35af62ca9 100644 --- a/docs/pages/benchmark.md +++ b/docs/pages/benchmark.md @@ -1,6 +1,11 @@ # Benchmark your model as a social agent in Sotopia +``` +sotopia_benchmark --model= +``` +or + ``` python sotopia/benchmark/cli.py --model= ``` -Currently this script would run over 100 simulations on the Sotopia Hard tasks. And the partner model is fixed to be `together_ai/meta-llama/Llama-2-70b-chat-hf` +Currently this script would run over 100 simulations on the Sotopia Hard tasks. And the partner model is fixed to be `meta-llama/Llama-3-70b-chat-hf` diff --git a/docs/pages/examples.mdx b/docs/pages/examples.mdx index 950158346..aac169c80 100644 --- a/docs/pages/examples.mdx +++ b/docs/pages/examples.mdx @@ -19,22 +19,3 @@ python examples/benchmark_evaluator.py --push-to-db --model=