From c83daa626daf35351796877e58d6f67d9104d43d Mon Sep 17 00:00:00 2001 From: yasufumi Date: Fri, 17 Mar 2023 14:22:36 +0900 Subject: [PATCH 1/5] Add the new task type Task Free --- doccano_mini/tasks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doccano_mini/tasks.py b/doccano_mini/tasks.py index 8dbea17..c00fb5c 100644 --- a/doccano_mini/tasks.py +++ b/doccano_mini/tasks.py @@ -3,6 +3,7 @@ class TaskType(Enum): TEXT_CLASSIFICATION = "Text Classification" + TASK_FREE = "Task Free" options = [task_type.value for task_type in TaskType] From f983b74f4bea04b9a8f7696b1a26cb70f0212296 Mon Sep 17 00:00:00 2001 From: yasufumi Date: Fri, 17 Mar 2023 14:23:32 +0900 Subject: [PATCH 2/5] Add a task free annotation --- doccano_mini/app.py | 76 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 10 deletions(-) diff --git a/doccano_mini/app.py b/doccano_mini/app.py index dd726c5..8d0161e 100644 --- a/doccano_mini/app.py +++ b/doccano_mini/app.py @@ -1,10 +1,13 @@ +import pandas as pd import streamlit as st from langchain.chains import LLMChain from langchain.llms import OpenAI +from langchain.prompts.few_shot import FewShotPromptTemplate +from langchain.prompts.prompt import PromptTemplate from doccano_mini.examples import select_example_df from doccano_mini.prompts import select_prompt_maker -from doccano_mini.tasks import options +from doccano_mini.tasks import TaskType, options CODE = """from langchain.chains import load_chain @@ -22,14 +25,7 @@ def display_download_button(): ) -def main(): - st.set_page_config(page_title="doccano-mini", page_icon=":memo:") - - st.title("doccano-mini") - - st.header("Select your task") - task = st.selectbox("", options=options, label_visibility="collapsed") - +def task_classification(task: TaskType): st.header("Annotate your data") df = select_example_df(task) edited_df = st.experimental_data_editor(df, num_rows="dynamic", width=1000) @@ -64,7 +60,7 @@ def main(): st.markdown(f"```\n{prompt.format(input=text)}\n```") if st.button("Predict"): - llm = OpenAI(model_name=model_name, temperature=temperature, top_p=top_p) + llm = OpenAI(model_name=model_name, temperature=temperature, top_p=top_p) # type:ignore chain = LLMChain(llm=llm, prompt=prompt) response = chain.run(text) label = response.split(":")[1] @@ -73,6 +69,66 @@ def main(): chain.save("config.yaml") display_download_button() + +def main(): + st.set_page_config(page_title="doccano-mini", page_icon=":memo:") + + st.title("doccano-mini") + + st.header("Select your task") + task = st.selectbox("", options=options, label_visibility="collapsed") + + if task == "Text Classification": + task_classification(task) + else: + num_cols = st.number_input("Set the number of columns", min_value=2, max_value=10) + columns = [st.text_input(f"Column {i}:", value=f"column {i}") for i in range(1, num_cols + 1)] + + df = pd.DataFrame({column: [] for column in columns}, dtype=str) + edited_df = st.experimental_data_editor(df, num_rows="dynamic", width=1000) + + example_prompt = PromptTemplate( + input_variables=columns, template="\n".join([f"{column}: {{{column}}}" for column in columns]) + ) + + prefix = st.text_area( + label="Enter task instruction", placeholder="Predict column 2 based on column 1", height=200 + ) + suffix = "\n".join([f"{column}: {{{column}}}" for column in columns[:-1]]) + + prompt = FewShotPromptTemplate( + examples=edited_df.to_dict(orient="records"), + example_prompt=example_prompt, + prefix=prefix, + suffix=suffix, + input_variables=columns[:-1], + ) + + inputs = [st.text_input(f"Input for {columns[i]}:") for i in range(num_cols - 1)] + + st.markdown(f"```\n{prompt.format(**dict(zip(columns, inputs)))}\n```") + + available_models = ( + "gpt-3.5-turbo", + "gpt-3.5-turbo-0301", + "text-davinci-003", + "text-davinci-002", + "code-davinci-002", + ) + # Use text-davinci-003 by default. + model_name = st.selectbox("Model", available_models, index=2) + temperature = st.slider("Temperature", min_value=0.0, max_value=1.0, value=0.7, step=0.01) + top_p = st.slider("Top-p", min_value=0.0, max_value=1.0, value=1.0, step=0.01) + if st.button("Predict"): + llm = OpenAI(model_name=model_name, temperature=temperature, top_p=top_p) # type:ignore + chain = LLMChain(llm=llm, prompt=prompt) + response = chain.run(**dict(zip(columns, inputs))) + label = response.split(":")[1] + st.text(label) + + chain.save("config.yaml") + display_download_button() + st.header("Usage") st.code(CODE) From 5e5c139d8dd46e8840b669b7f0cd77c06251ea2b Mon Sep 17 00:00:00 2001 From: yasufumi Date: Fri, 17 Mar 2023 15:51:29 +0900 Subject: [PATCH 3/5] Add a data frame for the task type Task Free --- doccano_mini/examples.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doccano_mini/examples.py b/doccano_mini/examples.py index 5731c8d..bafecbe 100644 --- a/doccano_mini/examples.py +++ b/doccano_mini/examples.py @@ -14,4 +14,7 @@ def select_example_df(task: TaskType) -> pd.DataFrame: columns=["text", "label"], ) return df + elif task == TaskType.TASK_FREE.value: + df = pd.DataFrame([{"Column 1": "", "Column 2": ""}], columns=["Column 1", "Column 2"]) + return df raise ValueError(f"Task {task} is not supported.") From 40bf21c9dd2f2d7a34636c23951a72fec464dd95 Mon Sep 17 00:00:00 2001 From: yasufumi Date: Fri, 17 Mar 2023 15:51:46 +0900 Subject: [PATCH 4/5] Add make_task_free_prompt method --- doccano_mini/prompts.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/doccano_mini/prompts.py b/doccano_mini/prompts.py index 73babd0..373515f 100644 --- a/doccano_mini/prompts.py +++ b/doccano_mini/prompts.py @@ -24,7 +24,25 @@ def make_classification_prompt(examples: List[dict]) -> FewShotPromptTemplate: return prompt +def make_task_free_prompt(examples: List[dict]) -> FewShotPromptTemplate: + columns = list(examples[0]) + + example_prompt = PromptTemplate( + input_variables=columns, template="\n".join([f"{column}: {{{column}}}" for column in columns]) + ) + + prompt = FewShotPromptTemplate( + examples=examples, + example_prompt=example_prompt, + suffix="\n".join([f"{column}: {{{column}}}" for column in columns[:-1]]), + input_variables=columns[:-1], + ) + return prompt + + def select_prompt_maker(task: TaskType): if task == TaskType.TEXT_CLASSIFICATION.value: return make_classification_prompt + elif task == TaskType.TASK_FREE.value: + return make_task_free_prompt raise ValueError(f"Task {task} is not supported.") From 6c7cbc2ba0777682c6e6b62846d21531e0278786 Mon Sep 17 00:00:00 2001 From: yasufumi Date: Fri, 17 Mar 2023 15:52:10 +0900 Subject: [PATCH 5/5] Refactor app.py --- doccano_mini/app.py | 110 ++++++++++++++++++++------------------------ 1 file changed, 50 insertions(+), 60 deletions(-) diff --git a/doccano_mini/app.py b/doccano_mini/app.py index 8d0161e..299e947 100644 --- a/doccano_mini/app.py +++ b/doccano_mini/app.py @@ -1,9 +1,6 @@ -import pandas as pd import streamlit as st from langchain.chains import LLMChain from langchain.llms import OpenAI -from langchain.prompts.few_shot import FewShotPromptTemplate -from langchain.prompts.prompt import PromptTemplate from doccano_mini.examples import select_example_df from doccano_mini.prompts import select_prompt_maker @@ -15,6 +12,16 @@ chain.run("YOUR TEXT")""" +# https://platform.openai.com/docs/models/gpt-3-5 +AVAILABLE_MODELS = ( + "gpt-3.5-turbo", + "gpt-3.5-turbo-0301", + "text-davinci-003", + "text-davinci-002", + "code-davinci-002", +) + + def display_download_button(): st.header("Download LangChain's config") with open("config.yaml", "r", encoding="utf-8") as f: @@ -43,16 +50,8 @@ def task_classification(task: TaskType): col1, col2 = st.columns([3, 1]) text = col1.text_area(label="Please enter your text.", value="", height=300) - # https://platform.openai.com/docs/models/gpt-3-5 - available_models = ( - "gpt-3.5-turbo", - "gpt-3.5-turbo-0301", - "text-davinci-003", - "text-davinci-002", - "code-davinci-002", - ) # Use text-davinci-003 by default. - model_name = col2.selectbox("Model", available_models, index=2) + model_name = col2.selectbox("Model", AVAILABLE_MODELS, index=2) temperature = col2.slider("Temperature", min_value=0.0, max_value=1.0, value=0.7, step=0.01) top_p = col2.slider("Top-p", min_value=0.0, max_value=1.0, value=1.0, step=0.01) @@ -70,6 +69,43 @@ def task_classification(task: TaskType): display_download_button() +def task_free(task: TaskType): + st.header("Annotate your data") + + num_cols = st.number_input("Set the number of columns", min_value=2, max_value=10) + columns = [st.text_input(f"Column {i}:", value=f"column {i}") for i in range(1, int(num_cols) + 1)] + + df = select_example_df(task) + df = df.reindex(columns, axis="columns", fill_value="") + edited_df = st.experimental_data_editor(df, num_rows="dynamic", width=1000) + examples = edited_df.to_dict(orient="records") + + prompt = select_prompt_maker(task)(examples) + + prompt.prefix = st.text_area( + label="Enter task instruction", + placeholder=f"Predict {columns[-1]} based on {', '.join(columns[:-1])}.", + height=200, + ) + + inputs = {column: st.text_input(f"Input for {column}:") for column in columns[:-1]} + + st.markdown(f"Your prompt\n```\n{prompt.format(**inputs)}\n```") + + # Use text-davinci-003 by default. + model_name = st.selectbox("Model", AVAILABLE_MODELS, index=2) + temperature = st.slider("Temperature", min_value=0.0, max_value=1.0, value=0.7, step=0.01) + top_p = st.slider("Top-p", min_value=0.0, max_value=1.0, value=1.0, step=0.01) + if st.button("Predict"): + llm = OpenAI(model_name=model_name, temperature=temperature, top_p=top_p) # type:ignore + chain = LLMChain(llm=llm, prompt=prompt) + response = chain.run(**inputs) + st.text(response) + + chain.save("config.yaml") + display_download_button() + + def main(): st.set_page_config(page_title="doccano-mini", page_icon=":memo:") @@ -78,56 +114,10 @@ def main(): st.header("Select your task") task = st.selectbox("", options=options, label_visibility="collapsed") - if task == "Text Classification": + if task == TaskType.TEXT_CLASSIFICATION.value: task_classification(task) else: - num_cols = st.number_input("Set the number of columns", min_value=2, max_value=10) - columns = [st.text_input(f"Column {i}:", value=f"column {i}") for i in range(1, num_cols + 1)] - - df = pd.DataFrame({column: [] for column in columns}, dtype=str) - edited_df = st.experimental_data_editor(df, num_rows="dynamic", width=1000) - - example_prompt = PromptTemplate( - input_variables=columns, template="\n".join([f"{column}: {{{column}}}" for column in columns]) - ) - - prefix = st.text_area( - label="Enter task instruction", placeholder="Predict column 2 based on column 1", height=200 - ) - suffix = "\n".join([f"{column}: {{{column}}}" for column in columns[:-1]]) - - prompt = FewShotPromptTemplate( - examples=edited_df.to_dict(orient="records"), - example_prompt=example_prompt, - prefix=prefix, - suffix=suffix, - input_variables=columns[:-1], - ) - - inputs = [st.text_input(f"Input for {columns[i]}:") for i in range(num_cols - 1)] - - st.markdown(f"```\n{prompt.format(**dict(zip(columns, inputs)))}\n```") - - available_models = ( - "gpt-3.5-turbo", - "gpt-3.5-turbo-0301", - "text-davinci-003", - "text-davinci-002", - "code-davinci-002", - ) - # Use text-davinci-003 by default. - model_name = st.selectbox("Model", available_models, index=2) - temperature = st.slider("Temperature", min_value=0.0, max_value=1.0, value=0.7, step=0.01) - top_p = st.slider("Top-p", min_value=0.0, max_value=1.0, value=1.0, step=0.01) - if st.button("Predict"): - llm = OpenAI(model_name=model_name, temperature=temperature, top_p=top_p) # type:ignore - chain = LLMChain(llm=llm, prompt=prompt) - response = chain.run(**dict(zip(columns, inputs))) - label = response.split(":")[1] - st.text(label) - - chain.save("config.yaml") - display_download_button() + task_free(task) st.header("Usage") st.code(CODE)