Skip to content

Commit

Permalink
testing
Browse files Browse the repository at this point in the history
  • Loading branch information
emmalin-7 committed Jan 28, 2025
1 parent 0d13085 commit 4e86d31
Showing 1 changed file with 19 additions and 19 deletions.
38 changes: 19 additions & 19 deletions notebooks/agent_optimizer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -78,7 +78,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand All @@ -88,7 +88,7 @@
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[20], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01magential\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtraining\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01magent_optimizer\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01magent\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PromptOptimizer\n\u001b[1;32m 3\u001b[0m question \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mWho was once considered the best kick boxer in the world, however he has been involved in a number of controversies relating to his \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munsportsmanlike conducts\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m in the sport and crimes of violence outside of the ring\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 5\u001b[0m agent \u001b[38;5;241m=\u001b[39m \u001b[43mPromptOptimizer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mllm\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mllm\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43mbenchmark\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhotpotqa\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# kwargs.\u001b[39;49;00m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_steps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m8\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m5000\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43menc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtiktoken\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding_for_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mgpt-3.5-turbo\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[43mdocstore\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mDocstoreExplorer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mWikipedia\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[43m\t\u001b[49m\u001b[43mmax_actions_per_step\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\n\u001b[1;32m 14\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 16\u001b[0m out \u001b[38;5;241m=\u001b[39m agent\u001b[38;5;241m.\u001b[39mgenerate(\n\u001b[1;32m 17\u001b[0m question\u001b[38;5;241m=\u001b[39mquestion,\n\u001b[1;32m 18\u001b[0m examples\u001b[38;5;241m=\u001b[39mHOTPOTQA_FEWSHOT_EXAMPLES_REACT, \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 21\u001b[0m reset\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 22\u001b[0m )\n",
"Cell \u001b[0;32mIn[2], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01magential\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mtraining\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01magent_optimizer\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01magent\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PromptOptimizer\n\u001b[1;32m 3\u001b[0m question \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mWho was once considered the best kick boxer in the world, however he has been involved in a number of controversies relating to his \u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124munsportsmanlike conducts\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m in the sport and crimes of violence outside of the ring\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m----> 5\u001b[0m agent \u001b[38;5;241m=\u001b[39m \u001b[43mPromptOptimizer\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mllm\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mllm\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43mbenchmark\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mhotpotqa\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# kwargs.\u001b[39;49;00m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_steps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m8\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_tokens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m5000\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43menc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtiktoken\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding_for_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mgpt-3.5-turbo\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[43mdocstore\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mDocstoreExplorer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mWikipedia\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[43m\t\u001b[49m\u001b[43mmax_actions_per_step\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\n\u001b[1;32m 14\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 16\u001b[0m out \u001b[38;5;241m=\u001b[39m agent\u001b[38;5;241m.\u001b[39mgenerate(\n\u001b[1;32m 17\u001b[0m question\u001b[38;5;241m=\u001b[39mquestion,\n\u001b[1;32m 18\u001b[0m examples\u001b[38;5;241m=\u001b[39mHOTPOTQA_FEWSHOT_EXAMPLES_REACT, \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 21\u001b[0m reset\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 22\u001b[0m )\n",
"File \u001b[0;32m~/agential/agential/training/agent_optimizer/agent.py:140\u001b[0m, in \u001b[0;36mPromptOptimizer.__init__\u001b[0;34m(self, llm, benchmark, testing, **strategy_kwargs)\u001b[0m\n\u001b[1;32m 137\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Initialization.\"\"\"\u001b[39;00m\n\u001b[1;32m 138\u001b[0m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__init__\u001b[39m(llm\u001b[38;5;241m=\u001b[39mllm, benchmark\u001b[38;5;241m=\u001b[39mbenchmark, testing\u001b[38;5;241m=\u001b[39mtesting)\n\u001b[0;32m--> 140\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstrategy \u001b[38;5;241m=\u001b[39m \u001b[43mPromptOptimizer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_strategy\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 141\u001b[0m \u001b[43m \u001b[49m\u001b[43mbenchmark\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbenchmark\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 142\u001b[0m \u001b[43m \u001b[49m\u001b[43mllm\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mllm\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 143\u001b[0m \u001b[43m \u001b[49m\u001b[43mtesting\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtesting\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 144\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mstrategy_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 145\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/agential/agential/training/agent_optimizer/agent.py:205\u001b[0m, in \u001b[0;36mPromptOptimizer.get_strategy\u001b[0;34m(benchmark, **kwargs)\u001b[0m\n\u001b[1;32m 202\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnsupported benchmark: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mbenchmark\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for agent ReAct\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 204\u001b[0m strategy \u001b[38;5;241m=\u001b[39m REACT_STRATEGIES[benchmark]\n\u001b[0;32m--> 205\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mstrategy\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[0;31mTypeError\u001b[0m: PromptOptimizerQAStrategy.__init__() got an unexpected keyword argument 'max_actions_per_step'"
Expand Down Expand Up @@ -155,7 +155,7 @@
"source": [
"question = \"Nikolaj Coster-Waldau worked with the Fox Broadcasting Company.\"\n",
"\n",
"agent = ReAct(\n",
"agent = PromptOptimizer(\n",
" llm=llm,\n",
" benchmark=\"fever\",\n",
" # kwargs.\n",
Expand All @@ -168,7 +168,7 @@
"out = agent.generate(\n",
" question=question,\n",
" examples=FEVER_FEWSHOT_EXAMPLES_REACT,\n",
" prompt=REACT_INSTRUCTION_FEVER,\n",
" prompt=PROMPT_OPTIMIZER_INSTRUCTION_FEVER,\n",
" additional_keys={},\n",
" reset=True,\n",
")"
Expand Down Expand Up @@ -198,7 +198,7 @@
"source": [
"question = \"When did the simpsons first air on television?\"\n",
"\n",
"agent = ReAct(\n",
"agent = PromptOptimizer(\n",
" llm=llm,\n",
" benchmark=\"ambignq\",\n",
" # kwargs.\n",
Expand All @@ -211,7 +211,7 @@
"out = agent.generate(\n",
" question=question,\n",
" examples=AMBIGNQ_FEWSHOT_EXAMPLES_REACT,\n",
" prompt=REACT_INSTRUCTION_AMBIGNQ,\n",
" prompt=PROMPT_OPTIMIZER_INSTRUCTION_AMBIGNQ,\n",
" additional_keys={},\n",
" reset=True,\n",
")"
Expand Down Expand Up @@ -241,7 +241,7 @@
"source": [
"question = \"Which American-born Sinclair won the Nobel Prize for Literature in 1930?\"\n",
"\n",
"agent = ReAct(\n",
"agent = PromptOptimizer(\n",
" llm=llm,\n",
" benchmark=\"triviaqa\",\n",
" # kwargs.\n",
Expand All @@ -254,7 +254,7 @@
"out = agent.generate(\n",
" question=question,\n",
" examples=TRIVIAQA_FEWSHOT_EXAMPLES_REACT,\n",
" prompt=REACT_INSTRUCTION_TRIVIAQA,\n",
" prompt=PROMPT_OPTIMIZER_INSTRUCTION_TRIVIAQA,\n",
" additional_keys={},\n",
" reset=True,\n",
")"
Expand Down Expand Up @@ -291,7 +291,7 @@
"source": [
"question = \"Janet's ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with 4933828. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?\"\n",
"\n",
"agent = ReAct(\n",
"agent = PromptOptimizer(\n",
" llm=llm, \n",
" benchmark=\"gsm8k\",\n",
" # kwargs.\n",
Expand All @@ -303,7 +303,7 @@
"out = agent.generate(\n",
" question=question,\n",
" examples=GSM8K_FEWSHOT_EXAMPLES_REACT,\n",
" prompt=REACT_INSTRUCTION_GSM8K,\n",
" prompt=PROMPT_OPTIMIZER_INSTRUCTION_GSM8K,\n",
" additional_keys={},\n",
" reset=True,\n",
")"
Expand Down Expand Up @@ -333,7 +333,7 @@
"source": [
"question = \"There are 87 oranges and 290 bananas in Philip's collection. If the bananas are organized into 2 groups and oranges are organized into 93 groups. How big is each group of bananas?\"\n",
"\n",
"agent = ReAct(\n",
"agent = PromptOptimizer(\n",
" llm=llm, \n",
" benchmark=\"svamp\",\n",
" # kwargs.\n",
Expand All @@ -345,7 +345,7 @@
"out = agent.generate(\n",
" question=question,\n",
" examples=SVAMP_FEWSHOT_EXAMPLES_REACT,\n",
" prompt=REACT_INSTRUCTION_SVAMP,\n",
" prompt=PROMPT_OPTIMIZER_INSTRUCTION_SVAMP,\n",
" additional_keys={},\n",
" reset=True,\n",
")"
Expand Down Expand Up @@ -383,7 +383,7 @@
"\n",
"Question: Some friends went bowling and kept track of their scores. How many more points did Mike score than Irma?\"\"\"\n",
"\n",
"agent = ReAct(\n",
"agent = PromptOptimizer(\n",
" llm=llm, \n",
" benchmark=\"tabmwp\",\n",
" # kwargs.\n",
Expand All @@ -395,7 +395,7 @@
"out = agent.generate(\n",
" question=question,\n",
" examples=TABMWP_FEWSHOT_EXAMPLES_REACT,\n",
" prompt=REACT_INSTRUCTION_TABMWP,\n",
" prompt=PROMPT_OPTIMIZER_INSTRUCTION_TABMWP,\n",
" additional_keys={},\n",
" reset=True,\n",
")"
Expand Down Expand Up @@ -434,7 +434,7 @@
"question = inst['prompt']\n",
"tests = f\"{inst['test']}\\ncheck({inst['entry_point']})\"\n",
"\n",
"agent = ReAct(\n",
"agent = PromptOptimizer(\n",
" llm=llm,\n",
" benchmark=\"humaneval\",\n",
" # kwargs.\n",
Expand All @@ -446,7 +446,7 @@
"out = agent.generate(\n",
" question=question,\n",
" examples=HUMANEVAL_FEWSHOT_EXAMPLES_REACT,\n",
" prompt=REACT_INSTRUCTION_HUMANEVAL,\n",
" prompt=PROMPT_OPTIMIZER_INSTRUCTION_HUMANEVAL,\n",
" additional_keys={},\n",
" reset=True,\n",
")"
Expand Down Expand Up @@ -479,7 +479,7 @@
"assert first_repeated_char(\"abc\") == None\n",
"assert first_repeated_char(\"123123\") == \"1\\\"\"\"\"\n",
"\n",
"agent = ReAct(\n",
"agent = PromptOptimizer(\n",
" llm=llm,\n",
" benchmark=\"mbpp\",\n",
" # kwargs.\n",
Expand All @@ -491,7 +491,7 @@
"out = agent.generate(\n",
" question=question,\n",
" examples=MBPP_FEWSHOT_EXAMPLES_REACT,\n",
" prompt=REACT_INSTRUCTION_MBPP,\n",
" prompt=PROMPT_OPTIMIZER_INSTRUCTION_MBPP,\n",
" additional_keys={\"tests\": tests},\n",
" reset=True,\n",
")"
Expand Down

0 comments on commit 4e86d31

Please sign in to comment.