Skip to content

Commit

Permalink
Code environment (#38)
Browse files Browse the repository at this point in the history
Co-authored-by: nik <[email protected]>
  • Loading branch information
niklub and nik authored Nov 20, 2023
1 parent 919262c commit 6b521cb
Show file tree
Hide file tree
Showing 8 changed files with 428 additions and 85 deletions.
153 changes: 105 additions & 48 deletions adala/agents/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from adala.runtimes.base import Runtime
from adala.runtimes._openai import OpenAIChatRuntime
from adala.runtimes import GuidanceRuntime
from adala.skills._base import Skill
from adala.skills._base import Skill, AnalysisSkill, TransformSkill, SynthesisSkill
from adala.memories.base import Memory
from adala.skills.skillset import SkillSet, LinearSkillSet
from adala.utils.logs import print_dataframe, print_text, print_error
Expand Down Expand Up @@ -218,11 +218,102 @@ def select_skill_to_train(self, feedback: EnvironmentFeedback, accuracy_threshol

return train_skill_name, train_skill_output, acc_score

def pe_optimization(self, skill, examples, teacher_runtime):
# system messages
messages = [{'role': 'system', 'content': 'You are a helpful assistant.'}]

messages += [{
'role': 'user',
'content': '''
A prompt is a text paragraph that outlines the expected actions and instructs the model to \
generate a specific output. This prompt is concatenated with the input text, and the \
model then creates the required output. Formally, the prompt is a prefix to the input:
output = model(concatenate(prompt, input))
Model can produce erroneous output if the prompt is not well defined. \
In our collaboration, we’ll work together to refine a prompt. The process consists of two main steps:
## Step 1
I will provide you with the current prompt, how the prompt is concatenated with the input text
(i.e., "full template"), along with some example(s) that are associated with
this prompt. Each example contains the input, the final answer produced by the model, and the user feedback.
Your task is to analyze the examples, determining whether the
existing prompt is decsribing the task reflected by these examples precisely, and suggest
changes to the prompt.
## Step 2
Next, you will carefully review your reasoning in step 1, integrate the insights to craft a
new, optimized prompt.'''
}]

messages += [{
'role': 'assistant',
'content': 'Sure, I’d be happy to help you with this prompt engineering problem. '
'Please provide me with the current prompt, the full template, and the examples.'
}]

messages += [{
'role': 'user',
'content': f'''
## Current Prompt
{skill.instructions}
## Full Template
{{current prompt}}
{skill.input_template}
{skill.output_template}
## Examples
{examples}
## Instructions
For some of these examples, the user feedback points out that the model is not producing the correct output. \
This may be due to the prompt being misleading or not describing the task precisely.
Please examine the example(s) carefully. Note that the user feedback should be considered as ground truth, but \
the prompts (task descriptions) may be incorrect and need modification.
For each example, provide reasoning according to the following template:
### Example <id>
Input: <input>
Output: <output>
Feedback: <feedback>
Is the output correct according to the feedback: <yes or no, and your reasoning>
To output the correct label, is it necessary to edit the prompt: <yes or no, and your
reasoning>
If yes, provide detailed analysis and actionable suggestions to edit the prompt: <analysis and
suggestions>
'''}]

reasoning = teacher_runtime.execute(messages)

messages += [
{'role': 'assistant', 'content': reasoning},
{'role': 'user', 'content': f'''
Now please carefully review your reasoning in Step 1 and help with Step 2: refining the prompt.
## Current prompt
{skill.instructions}
## Instructions
- The new prompt should be concise and direct.
- The new prompt should should describe the task precisely and address the points raised in the user feedback.
- Include a few examples in the prompt to help the model learn the task, by providing inputs and outputs that follow full template.
- Reply only with the prompt. Do not include other text.
'''}]

print(messages)
new_prompt = teacher_runtime.execute(messages)
return new_prompt

def learn(
self,
learning_iterations: int = 3,
accuracy_threshold: float = 0.9,
update_memory: bool = True,
batch_size: Optional[int] = None,
num_feedbacks: Optional[int] = None,
runtime: Optional[str] = None,
teacher_runtime: Optional[str] = None,
Expand All @@ -242,36 +333,17 @@ def learn(
runtime = self.get_runtime(runtime=runtime)
teacher_runtime = self.get_teacher_runtime(runtime=teacher_runtime)

messages = [
{'role': 'system',
'content': "Act as LLM instructions generator. Full LLM prompt is created by concatenating LLM instructions and input text. "
"You should respond only with the LLM instructions. After each generation, user provides "
"a feedback which includes example of input text, LLM output, and user feedback. "
"Based on this analysis, generate new "
"instructions for the LLM. These instructions should be concise, direct, and "
"focused solely on addressing the points raised in the user feedback, "
"aligning with the input, and improving upon the predictions. "
"Include relevant few-shot examples within the instructions that are aligned "
"with the user's feedback and the initial input, demonstrating the desired "
"format and approach for the LLM’s prediction. These examples should serve "
"as clear models for the expected output in the next iteration."}
]

skill_accuracies = {}

for iteration in range(learning_iterations):

print_text(f'\n\n=> Iteration #{iteration}: Getting feedback, analyzing and improving ...')

inputs = self.environment.get_data_batch()

inputs = self.environment.get_data_batch(batch_size=batch_size)
predictions = self.skills.apply(inputs, runtime=runtime)

feedback = self.environment.get_feedback(self.skills, predictions, num_feedbacks=num_feedbacks)
print('Predictions and feedback:')
fb = feedback.feedback.rename(columns=lambda x: x + '__fb' if x in predictions.columns else x)
print_dataframe(InternalDataFrameConcat([predictions, feedback.match, fb], axis=1))

analyzed_df = fb.merge(predictions, left_index=True, right_index=True)
print_dataframe(analyzed_df)
train_skill_name, train_skill_output, accuracy = self.select_skill_to_train(feedback, accuracy_threshold)
if not train_skill_name:
print_text(f'No skill to improve found. Continue learning...')
Expand All @@ -280,34 +352,19 @@ def learn(
print_text(f'Output to improve: "{train_skill_output}" (Skill="{train_skill_name}")\n'
f'Accuracy = {accuracy * 100:0.2f}%', style='bold red')

# collect user feedback
messages.append({'role': 'assistant', 'content': train_skill.instructions})
message = ''
if train_skill_output in skill_accuracies and accuracy <= skill_accuracies[train_skill_output]:
message += 'The quality of the proposed instructions has not improved. ' \
'Carefully analyze the feedback and do your best to improve the instructions. '
skill_accuracies[train_skill_output] = accuracy
message += 'Here is the feedback based on the current instructions:\n\n'
for row in InternalDataFrameConcat((predictions, fb), axis=1).to_dict(orient='records'):
examples = []

for row in analyzed_df.to_dict(orient='records'):
# if fb marked as NaN, skip
if not row[f'{train_skill_output}__fb']:
continue
message = f'{message}\n' \
f'{train_skill.input_template.format(**row)}\n' \
f'{train_skill.output_template.format(**row)}\n' \
f'{row[f"{train_skill_output}__fb"]}\n\n'

messages.append({
'role': 'user',
'content': f'{message}\n\n'
f'Please address the feedback and provide new improved instructions for the LLM. '
f'Use the following format for the few-shot examples:\n\n'
f'{train_skill.input_template}\n'
f'{train_skill.output_template}\n\n'
f'Carefully analyze this feedback, and provide updated prompting instructions for LLM:'
})
# print(messages)
new_instructions = teacher_runtime.execute(messages)
examples.append(
f'{train_skill.input_template.format(**row)}\n'
f'{train_skill.output_template.format(**row)}\n'
f'Feedback: {row[f"{train_skill_output}__fb"]}\n\n'
)

new_instructions = self.pe_optimization(train_skill, '\n'.join(examples), teacher_runtime)
train_skill.instructions = new_instructions
print_text(f'{train_skill.instructions}', style='bold green')

Expand Down
3 changes: 2 additions & 1 deletion adala/environments/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .base import Environment, StaticEnvironment
from .console import ConsoleEnvironment
from .web import WebStaticEnvironment
from .web import WebStaticEnvironment
from .code import SimpleCodeValidationEnvironment
73 changes: 73 additions & 0 deletions adala/environments/code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import sys
import io
from contextlib import redirect_stdout, redirect_stderr
from typing import Dict, Optional, List
from .base import StaticEnvironment, EnvironmentFeedback
from adala.skills import SkillSet
from adala.utils.internal_data import InternalDataFrame


class redirect_stdin:
"""Context manager for redirecting stdin to a given StringIO object."""
def __init__(self, new_target):
self.new_target = new_target
self._original_stdin = sys.stdin

def __enter__(self):
sys.stdin = self.new_target

def __exit__(self, exc_type, exc_value, traceback):
sys.stdin = self._original_stdin


class SimpleCodeValidationEnvironment(StaticEnvironment):

code_template: str = None
code_fields: Dict[str, str] = None

def execute_code(self, code: str, input_string) -> Dict:
out = {'success': False}
stdout = io.StringIO()
stderr = io.StringIO()
stdin = io.StringIO(input_string)

try:
with redirect_stdin(stdin), redirect_stdout(stdout), redirect_stderr(stderr):
exec(code, {"__builtins__": __builtins__})
out['success'] = True
except Exception as e:
stderr.write(str(e))

out['stdout'] = stdout.getvalue()
out['stderr'] = stderr.getvalue()
return out

def get_feedback(
self,
skills: SkillSet,
predictions: InternalDataFrame,
num_feedbacks: Optional[int] = None,
) -> EnvironmentFeedback:

if num_feedbacks:
predictions = predictions.sample(n=num_feedbacks)

code_match, code_feedback = {}, {}
for code_field, code_input in self.code_fields.items():
match, feedback = [], []
for data_row in predictions.to_dict(orient='records'):
res = self.execute_code(data_row[code_field], data_row[code_input])
if res['success']:
match.append(True)
feedback.append(f'Code is valid. Output:\n{res["stdout"]}')
else:
match.append(False)
feedback.append(f'Code is invalid:\n{res["stderr"]}')

code_match[code_field] = match
code_feedback[code_field] = feedback

return EnvironmentFeedback(
match=InternalDataFrame(code_match, index=predictions.index),
feedback=InternalDataFrame(code_feedback, index=predictions.index)
)
6 changes: 4 additions & 2 deletions adala/skills/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,11 +194,13 @@ def apply(
).str.cat(sep='\n')

output = runtime.record_to_record(
{'aggregated_input': aggregated_input},
input_template='{aggregated_input}',
{'input': aggregated_input},
input_template='{input}',
output_template=self.output_template,
instructions_template=self.instructions,
field_schema=self.field_schema,
extra_fields=self._get_extra_fields(),
)
# output['input'] = aggregated_input
# concatenate input and output and return dataframe
return InternalSeries(output)
22 changes: 12 additions & 10 deletions adala/skills/skillset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from adala.runtimes.base import Runtime
from adala.utils.logs import print_text
from adala.utils.internal_data import InternalDataFrame, InternalSeries, InternalDataFrameConcat, Record
from ._base import Skill
from ._base import Skill, TransformSkill, AnalysisSkill, SynthesisSkill


class SkillSet(BaseModel, ABC):
Expand Down Expand Up @@ -171,7 +171,7 @@ def apply(
# use input dataset for the first node in the pipeline
print_text(f"Applying skill: {skill_name}")
skill_output = skill.apply(skill_input, runtime)
if isinstance(skill_output, InternalDataFrame) and isinstance(skill_input, InternalDataFrame):
if isinstance(skill, TransformSkill):
# Columns to drop from skill_input because they are also in skill_output
cols_to_drop = set(skill_output.columns) & set(skill_input.columns)
skill_input_reduced = skill_input.drop(columns=cols_to_drop)
Expand All @@ -182,15 +182,12 @@ def apply(
right_index=True,
how='inner'
)
elif isinstance(skill_output, InternalDataFrame) and isinstance(skill_input, dict):
elif isinstance(skill, (AnalysisSkill, SynthesisSkill)):
skill_input = skill_output
elif isinstance(skill_output, dict) and isinstance(skill_input, InternalDataFrame):
skill_input = skill_output
elif isinstance(skill_output, dict) and isinstance(skill_input, dict):
skill_input = dict(skill_output, **skill_input)
else:
raise ValueError(f"Unsupported input type: {type(skill_input)} and output type: {type(skill_output)}")

raise ValueError(f"Unsupported skill type: {type(skill)}")
if isinstance(skill_input, InternalSeries):
skill_input = skill_input.to_frame().T
return skill_input

def __rich__(self):
Expand Down Expand Up @@ -261,6 +258,11 @@ def apply(
how='inner'
)
elif isinstance(skill_outputs[0], (dict, InternalSeries)):
return InternalDataFrame(skill_outputs)
# concatenate output to each row of input
output = skill_outputs[0]
return InternalDataFrameConcat(
[input,
InternalDataFrame([output] * len(input), columns=output.index, index=input.index)],
axis=1)
else:
raise ValueError(f"Unsupported output type: {type(skill_outputs[0])}")
Loading

0 comments on commit 6b521cb

Please sign in to comment.