-
Notifications
You must be signed in to change notification settings - Fork 18
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added graph_rag_test_data generation module using ragas
- Loading branch information
1 parent
0cdf0ec
commit 5262d42
Showing
2 changed files
with
160 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
from prompts import * | ||
import pandas as pd | ||
import random | ||
from tqdm.auto import tqdm | ||
import requests | ||
|
||
|
||
|
||
def get_response(prompt: str,url: str = "http://localhost:11434/api/generate",model: str= "llama3"): | ||
|
||
payload = { | ||
"model": model, | ||
"prompt": prompt, | ||
"stream": False | ||
} | ||
response = requests.post(url, json=payload) | ||
resp=response.json() | ||
return resp['response'] | ||
|
||
|
||
|
||
|
||
def qa_generator(documents: object, | ||
N_GENERATIONS: int= 20,): | ||
|
||
print(f"Generating {N_GENERATIONS} QA couples...") | ||
|
||
outputs = [] | ||
for sampled_context in tqdm(random.sample(documents, N_GENERATIONS)): | ||
# Generate QA couple | ||
output_QA_couple = get_response (QA_generation_prompt.format(context=sampled_context.text)) | ||
try: | ||
question = output_QA_couple.split("Factoid question: ")[-1].split("Answer: ")[0] | ||
answer = output_QA_couple.split("Answer: ")[-1] | ||
assert len(answer) < 300, "Answer is too long" | ||
outputs.append( | ||
{ | ||
"context": sampled_context.text, | ||
"question": question, | ||
"answer": answer, | ||
"source_doc": sampled_context.metadata, | ||
} | ||
) | ||
except: | ||
continue | ||
df=pd.DataFrame(outputs) | ||
df.to_csv("QA.csv") | ||
return outputs | ||
|
||
def critique_qa(outputs: list,): | ||
print("Generating critique for each QA couple...") | ||
for output in tqdm(outputs): | ||
evaluations = { | ||
"groundedness": get_response( | ||
question_groundedness_critique_prompt.format(context=output["context"], question=output["question"]), | ||
), | ||
"relevance": get_response( | ||
question_relevance_critique_prompt.format(question=output["question"]), | ||
), | ||
"standalone": get_response( | ||
question_standalone_critique_prompt.format(question=output["question"]), | ||
), | ||
} | ||
try: | ||
for criterion, evaluation in evaluations.items(): | ||
score, eval = ( | ||
int(evaluation.split("Total rating: ")[-1].strip()), | ||
evaluation.split("Total rating: ")[-2].split("Evaluation: ")[1], | ||
) | ||
output.update( | ||
{ | ||
f"{criterion}_score": score, | ||
f"{criterion}_eval": eval, | ||
} | ||
) | ||
except Exception as e: | ||
continue | ||
generated_questions = pd.DataFrame.from_dict(outputs) | ||
generated_questions = generated_questions.loc[ | ||
(generated_questions["groundedness_score"] >= 4) | ||
& (generated_questions["relevance_score"] >= 4) | ||
& (generated_questions["standalone_score"] >= 4) | ||
] | ||
generated_questions.to_csv("generated_questions.csv") | ||
return outputs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
QA_generation_prompt = """ | ||
Your task is to write a factoid question and an answer given a context. | ||
Your factoid question should be answerable with a specific, concise piece of factual information from the context. | ||
Your factoid question should be formulated in the same style as questions users could ask in a search engine. | ||
This means that your factoid question MUST NOT mention something like "according to the passage" or "context". | ||
Provide your answer as follows: | ||
Output::: | ||
Factoid question: (your factoid question) | ||
Answer: (your answer to the factoid question) | ||
Now here is the context. | ||
Context: {context}\n | ||
Output:::""" | ||
|
||
question_groundedness_critique_prompt = """ | ||
You will be given a context and a question. | ||
Your task is to provide a 'total rating' scoring how well one can answer the given question unambiguously with the given context. | ||
Give your answer on a scale of 1 to 5, where 1 means that the question is not answerable at all given the context, and 5 means that the question is clearly and unambiguously answerable with the context. | ||
Provide your answer as follows: | ||
Answer::: | ||
Evaluation: (your rationale for the rating, as a text) | ||
Total rating: (your rating, as a number between 1 and 5) | ||
You MUST provide values for 'Evaluation:' and 'Total rating:' in your answer. | ||
Now here are the question and context. | ||
Question: {question}\n | ||
Context: {context}\n | ||
Answer::: """ | ||
|
||
question_relevance_critique_prompt = """ | ||
You will be given a question. | ||
Your task is to provide a 'total rating' representing how useful this question can be to machine learning developers building NLP applications with the Hugging Face ecosystem. | ||
Give your answer on a scale of 1 to 5, where 1 means that the question is not useful at all, and 5 means that the question is extremely useful. | ||
Provide your answer as follows: | ||
Answer::: | ||
Evaluation: (your rationale for the rating, as a text) | ||
Total rating: (your rating, as a number between 1 and 5) | ||
You MUST provide values for 'Evaluation:' and 'Total rating:' in your answer. | ||
Now here is the question. | ||
Question: {question}\n | ||
Answer::: """ | ||
|
||
question_standalone_critique_prompt = """ | ||
You will be given a question. | ||
Your task is to provide a 'total rating' representing how context-independant this question is. | ||
Give your answer on a scale of 1 to 5, where 1 means that the question depends on additional information to be understood, and 5 means that the question makes sense by itself. | ||
For instance, if the question refers to a particular setting, like 'in the context' or 'in the document', the rating must be 1. | ||
The questions can contain obscure technical nouns or acronyms like Gradio, Hub, Hugging Face or Space and still be a 5: it must simply be clear to an operator with access to documentation what the question is about. | ||
For instance, "What is the name of the checkpoint from which the ViT model is imported?" should receive a 1, since there is an implicit mention of a context, thus the question is not independant from the context. | ||
Provide your answer as follows: | ||
Answer::: | ||
Evaluation: (your rationale for the rating, as a text) | ||
Total rating: (your rating, as a number between 1 and 5) | ||
You MUST provide values for 'Evaluation:' and 'Total rating:' in your answer. | ||
Now here is the question. | ||
Question: {question}\n | ||
Answer::: """ |