From 19c4d950761704bda42159c0029cc99c48e83096 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Sun, 8 Dec 2024 23:11:42 +0100 Subject: [PATCH 1/4] Updated tutorial 30 --- tutorials/30_File_Type_Preprocessing_Index_Pipeline.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/30_File_Type_Preprocessing_Index_Pipeline.ipynb b/tutorials/30_File_Type_Preprocessing_Index_Pipeline.ipynb index 5349636..45fb879 100644 --- a/tutorials/30_File_Type_Preprocessing_Index_Pipeline.ipynb +++ b/tutorials/30_File_Type_Preprocessing_Index_Pipeline.ipynb @@ -320,7 +320,7 @@ "id": "I06qdWsZibSz" }, "source": [ - "Now, let's build a RAG pipeline that answers queries based on the documents you just created in the section above. For this step, we will be using the [`HuggingFaceAPIGenerator`](https://docs.haystack.deepset.ai/docs/huggingfaceapigenerator) so must have a [Hugging Face API Key](https://huggingface.co/settings/tokens) for this section. We will be using the `HuggingFaceH4/zephyr-7b-beta` model." + "Now, let's build a RAG pipeline that answers queries based on the documents you just created in the section above. For this step, we will be using the [`HuggingFaceAPIChatGenerator`](https://docs.haystack.deepset.ai/docs/huggingfaceapichatgenerator) so must have a [Hugging Face API Key](https://huggingface.co/settings/tokens) for this section. We will be using the `HuggingFaceH4/zephyr-7b-beta` model." ] }, { From 086a13e0104c10fa1e10d1217c8b2bddc1b191e3 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Sun, 8 Dec 2024 23:19:42 +0100 Subject: [PATCH 2/4] Updated tutorial 32 --- ...le_Type_Preprocessing_Index_Pipeline.ipynb | 148 ++++++++-- ...ng_Documents_and_Queries_by_Language.ipynb | 264 ++++++------------ 2 files changed, 211 insertions(+), 201 deletions(-) diff --git a/tutorials/30_File_Type_Preprocessing_Index_Pipeline.ipynb b/tutorials/30_File_Type_Preprocessing_Index_Pipeline.ipynb index 45fb879..8fc936b 100644 --- a/tutorials/30_File_Type_Preprocessing_Index_Pipeline.ipynb +++ b/tutorials/30_File_Type_Preprocessing_Index_Pipeline.ipynb @@ -91,11 +91,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "id": "CkvJIU7FmDf9" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "from haystack.telemetry import tutorial_running\n", "\n", @@ -117,9 +126,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['recipe_files/vegan_flan_recipe.md',\n", + " 'recipe_files/vegan_keto_eggplant_recipe_fixed.pdf',\n", + " 'recipe_files/vegan_sunflower_hemp_cheese_recipe.txt']" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import gdown\n", "\n", @@ -180,7 +202,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "id": "hCWlpiQCBYOg" }, @@ -201,7 +223,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "id": "TVXSX0GHBtdj" }, @@ -222,7 +244,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "id": "4yGXKHEXIZxi" }, @@ -251,7 +273,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -260,7 +282,39 @@ "id": "gafXWtNYfNbr", "outputId": "10f351de-ac09-4273-85a2-ac7b59fb2f77" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "\n", + "πŸš… Components\n", + " - file_type_router: FileTypeRouter\n", + " - text_file_converter: TextFileToDocument\n", + " - markdown_converter: MarkdownToDocument\n", + " - pypdf_converter: PyPDFToDocument\n", + " - document_joiner: DocumentJoiner\n", + " - document_cleaner: DocumentCleaner\n", + " - document_splitter: DocumentSplitter\n", + " - document_embedder: SentenceTransformersDocumentEmbedder\n", + " - document_writer: DocumentWriter\n", + "πŸ›€οΈ Connections\n", + " - file_type_router.text/plain -> text_file_converter.sources (List[Union[str, Path, ByteStream]])\n", + " - file_type_router.application/pdf -> pypdf_converter.sources (List[Union[str, Path, ByteStream]])\n", + " - file_type_router.text/markdown -> markdown_converter.sources (List[Union[str, Path, ByteStream]])\n", + " - text_file_converter.documents -> document_joiner.documents (List[Document])\n", + " - markdown_converter.documents -> document_joiner.documents (List[Document])\n", + " - pypdf_converter.documents -> document_joiner.documents (List[Document])\n", + " - document_joiner.documents -> document_cleaner.documents (List[Document])\n", + " - document_cleaner.documents -> document_splitter.documents (List[Document])\n", + " - document_splitter.documents -> document_embedder.documents (List[Document])\n", + " - document_embedder.documents -> document_writer.documents (List[Document])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "preprocessing_pipeline.connect(\"file_type_router.text/plain\", \"text_file_converter.sources\")\n", "preprocessing_pipeline.connect(\"file_type_router.application/pdf\", \"pypdf_converter.sources\")\n", @@ -325,7 +379,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -357,7 +411,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -366,14 +420,35 @@ "id": "_s--8xEWq8Y9", "outputId": "1c050d5f-f2ae-4cd3-e0d4-533397a6af63" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "\n", + "πŸš… Components\n", + " - embedder: SentenceTransformersTextEmbedder\n", + " - retriever: InMemoryEmbeddingRetriever\n", + " - chat_prompt_builder: ChatPromptBuilder\n", + " - llm: HuggingFaceAPIChatGenerator\n", + "πŸ›€οΈ Connections\n", + " - embedder.embedding -> retriever.query_embedding (List[float])\n", + " - retriever.documents -> chat_prompt_builder.documents (List[Document])\n", + " - chat_prompt_builder.prompt -> llm.messages (List[ChatMessage])" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from haystack.components.embedders import SentenceTransformersTextEmbedder\n", "from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever\n", - "from haystack.components.builders import PromptBuilder\n", - "from haystack.components.generators import HuggingFaceAPIGenerator\n", + "from haystack.components.builders import ChatPromptBuilder\n", + "from haystack.dataclasses import ChatMessage\n", + "from haystack.components.generators.chat import HuggingFaceAPIChatGenerator\n", "\n", - "template = \"\"\"\n", + "template = [ChatMessage.from_user(\"\"\"\n", "Answer the questions based on the given context.\n", "\n", "Context:\n", @@ -383,19 +458,19 @@ "\n", "Question: {{ question }}\n", "Answer:\n", - "\"\"\"\n", + "\"\"\")]\n", "pipe = Pipeline()\n", "pipe.add_component(\"embedder\", SentenceTransformersTextEmbedder(model=\"sentence-transformers/all-MiniLM-L6-v2\"))\n", "pipe.add_component(\"retriever\", InMemoryEmbeddingRetriever(document_store=document_store))\n", - "pipe.add_component(\"prompt_builder\", PromptBuilder(template=template))\n", + "pipe.add_component(\"chat_prompt_builder\", ChatPromptBuilder(template=template))\n", "pipe.add_component(\n", " \"llm\",\n", - " HuggingFaceAPIGenerator(api_type=\"serverless_inference_api\", api_params={\"model\": \"HuggingFaceH4/zephyr-7b-beta\"}),\n", + " HuggingFaceAPIChatGenerator(api_type=\"serverless_inference_api\", api_params={\"model\": \"HuggingFaceH4/zephyr-7b-beta\"}),\n", ")\n", "\n", "pipe.connect(\"embedder.embedding\", \"retriever.query_embedding\")\n", - "pipe.connect(\"retriever\", \"prompt_builder.documents\")\n", - "pipe.connect(\"prompt_builder\", \"llm\")" + "pipe.connect(\"retriever\", \"chat_prompt_builder.documents\")\n", + "pipe.connect(\"chat_prompt_builder.prompt\", \"llm.messages\")" ] }, { @@ -409,11 +484,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": { "id": "qDqrU5emtBWQ" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Batches: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1/1 [00:00<00:00, 3.20it/s]\n" + ] + }, + { + "data": { + "text/plain": [ + "{'llm': {'replies': [ChatMessage(content=\"For vegan keto eggplant lasagna:\\n- 2 large eggplants\\n- Hella salt (optional)\\n- 1/2 cup store-bought vegan mozzarella cheese (for topping)\\n- Pesto:\\n - 4 oz basil (generally one large clamshell or 2 small ones)\\n - 1/4 cup almonds\\n - 1/4 cup nutritional yeast\\n - 1/4 cup olive oil\\n- Spinach tofu ricotta:\\n - 1 recipe spinach tofu ricotta\\n- 1 tsp garlic powder\\n- Juice of half a lemon\\n- Salt to taste\\n\\nFor macadamia nut cheese:\\n- 1 cup macadamia nuts (unsalted and unroasted)\\n- Salt (optional)\\n\\nInstructions:\\n1. Preheat oven to 400Β°F.\\n2. Slice eggplants into 1/4 inch thick slices and rub both sides with salt. Let sit for 20-30 minutes to extract moisture. Rinse with water and pat dry.\\n3. Roast the eggplant in the oven for about 20 minutes or until they're soft and brown in spots, rotating the pans halfway through.\\n4. Reduce oven temperature to 350Β°F.\\n5. In a separate bowl, mix together the store-bought vegan mozzarella cheese (for topping) with spinach tofu ricotta.\\n6. Assemble the lasagna: spread a layer of roasted eggplant at the bottom of the casserole dish, followed by a layer of pesto and a layer of the cheese mixture. Repeat until all ingredients are used, finishing with a layer of roasted eggplant. Sprinkle the remaining store-bought vegan mozzarella cheese (for topping) on top.\\n7. Bake for 25 minutes. Optionally, broil for 1-2 minutes at the end to melt the cheese.\\n\\nFor vegan persimmon flan:\\n- 1/2 cup persimmon pulp, strained (about 2 medium persimmons)\\n- 1 tbsp cornstarch\\n- 1/2 tsp agar agar\\n-\", role=, name=None, meta={'model': 'HuggingFaceH4/zephyr-7b-beta', 'finish_reason': 'length', 'index': 0, 'usage': ChatCompletionOutputUsage(completion_tokens=512, prompt_tokens=2276, total_tokens=2788)})]}}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "question = (\n", " \"What ingredients would I need to make vegan keto eggplant lasagna, vegan persimmon flan, and vegan hemp cheese?\"\n", @@ -422,8 +515,8 @@ "pipe.run(\n", " {\n", " \"embedder\": {\"text\": question},\n", - " \"prompt_builder\": {\"question\": question},\n", - " \"llm\": {\"generation_kwargs\": {\"max_new_tokens\": 350}},\n", + " \"chat_prompt_builder\": {\"question\": question},\n", + " \n", " }\n", ")" ] @@ -434,12 +527,7 @@ "id": "ZJueu_V4KP6w" }, "source": [ - "```python\n", - "{'llm': {'replies': [\"\\n\\nVegan Keto Eggplant Lasagna:\\n\\nIngredients:\\n- 2 large eggplants\\n- A lot of salt (you should have this in your house already)\\n- 1/2 cup store-bought vegan mozzarella (for topping)\\n\\nPesto:\\n- 4 oz basil (generally one large clamshell or 2 small ones)\\n- 1/4 cup almonds\\n- 1/4 cup nutritional yeast\\n- 1/4 cup olive oil\\n- 1 recipe vegan pesto (you can find this in the recipe)\\n- 1 recipe spinach tofu ricotta (you can find this in the recipe)\\n- 1 tsp garlic powder\\n- Juice of half a lemon\\n- Salt to taste\\n\\nSpinach Tofu Ricotta:\\n- 10 oz firm or extra firm tofu\\n- Juice of 1 lemon\\n- Garlic powder to taste\\n- Salt to taste\\n\\nInstructions:\\n1. Slice the eggplants into 1/4 inch thick slices. Some slices will need to be scrapped because it's difficult to get them all uniformly thin. Use them in soup or something, IDK, man.\\n2. Take the eggplant slices and rub both sides with salt. Don't be shy about how much, you're gonna rinse it off anyway.\\n3. Put them in a colander with something underneath it and let them sit for half an hour. This draws the water out so that the egg\"],\n", - " 'meta': [{'model': 'HuggingFaceH4/zephyr-7b-beta',\n", - " ...\n", - " }]}}\n", - "```" + "{'llm': {'replies': [ChatMessage(content=\"For vegan keto eggplant lasagna:\\n- 2 large eggplants\\n- Hella salt (optional)\\n- 1/2 cup store-bought vegan mozzarella cheese (for topping)\\n- Pesto:\\n - 4 oz basil (generally one large clamshell or 2 small ones)\\n - 1/4 cup almonds\\n - 1/4 cup nutritional yeast\\n - 1/4 cup olive oil\\n- Spinach tofu ricotta:\\n - 1 recipe spinach tofu ricotta\\n- 1 tsp garlic powder\\n- Juice of half a lemon\\n- Salt to taste\\n\\nFor macadamia nut cheese:\\n- 1 cup macadamia nuts (unsalted and unroasted)\\n- Salt (optional)\\n\\nInstructions:\\n1. Preheat oven to 400Β°F.\\n2. Slice eggplants into 1/4 inch thick slices and rub both sides with salt. Let sit for 20-30 minutes to extract moisture. Rinse with water and pat dry.\\n3. Roast the eggplant in the oven for about 20 minutes or until they're soft and brown in spots, rotating the pans halfway through.\\n4. Reduce oven temperature to 350Β°F.\\n5. In a separate bowl, mix together the store-bought vegan mozzarella cheese (for topping) with spinach tofu ricotta.\\n6. Assemble the lasagna: spread a layer of roasted eggplant at the bottom of the casserole dish, followed by a layer of pesto and a layer of the cheese mixture. Repeat until all ingredients are used, finishing with a layer of roasted eggplant. Sprinkle the remaining store-bought vegan mozzarella cheese (for topping) on top.\\n7. Bake for 25 minutes. Optionally, broil for 1-2 minutes at the end to melt the cheese.\\n\\nFor vegan persimmon flan:\\n- 1/2 cup persimmon pulp, strained (about 2 medium persimmons)\\n- 1 tbsp cornstarch\\n- 1/2 tsp agar agar\\n-\", role=, name=None, meta={'model': 'HuggingFaceH4/zephyr-7b-beta', 'finish_reason': 'length', 'index': 0, 'usage': ChatCompletionOutputUsage(completion_tokens=512, prompt_tokens=2276, total_tokens=2788)})]}}" ] }, { diff --git a/tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb b/tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb index c4bdb5b..6d745d7 100644 --- a/tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb +++ b/tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb @@ -10,7 +10,7 @@ "\n", "- **Level**: Beginner\n", "- **Time to complete**: 15 minutes\n", - "- **Components Used**: [`InMemoryDocumentStore`](https://docs.haystack.deepset.ai/docs/inmemorydocumentstore), [`DocumentLanguageClassifier`](https://docs.haystack.deepset.ai/docs/documentlanguageclassifier), [`MetadataRouter`](https://docs.haystack.deepset.ai/docs/metadatarouter), [`DocumentWriter`](https://docs.haystack.deepset.ai/docs/documentwriter), [`TextLanguageRouter`](https://docs.haystack.deepset.ai/docs/textlanguagerouter), [`DocumentJoiner`](https://docs.haystack.deepset.ai/docs/documentjoiner), [`InMemoryBM25Retriever`](https://docs.haystack.deepset.ai/docs/inmemorybm25retriever), [`PromptBuilder`](https://docs.haystack.deepset.ai/docs/promptbuilder), [`OpenAIGenerator`](https://docs.haystack.deepset.ai/docs/openaigenerator)\n", + "- **Components Used**: [`InMemoryDocumentStore`](https://docs.haystack.deepset.ai/docs/inmemorydocumentstore), [`DocumentLanguageClassifier`](https://docs.haystack.deepset.ai/docs/documentlanguageclassifier), [`MetadataRouter`](https://docs.haystack.deepset.ai/docs/metadatarouter), [`DocumentWriter`](https://docs.haystack.deepset.ai/docs/documentwriter), [`TextLanguageRouter`](https://docs.haystack.deepset.ai/docs/textlanguagerouter), [`DocumentJoiner`](https://docs.haystack.deepset.ai/docs/documentjoiner), [`InMemoryBM25Retriever`](https://docs.haystack.deepset.ai/docs/inmemorybm25retriever), [`ChatPromptBuilder`](https://docs.haystack.deepset.ai/docs/chatpromptbuilder), [`OpenAIChatGenerator`](https://docs.haystack.deepset.ai/docs/openaichatgenerator)\n", "- **Goal**: After completing this tutorial, you'll have learned how to build a Haystack pipeline to classify documents based on the (human) language they were written in.\n", "- Optionally, at the end you'll also incorporate language clasification and query routing into a RAG pipeline, so you can query documents based on the language a question was written in.\n", "\n", @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -65,115 +65,7 @@ "id": "lxgAfuxcdftS", "outputId": "36339d6b-f7a8-4686-911a-60642a8adbe6" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Collecting haystack-ai\n", - " Using cached haystack_ai-2.5.1-py3-none-any.whl.metadata (13 kB)\n", - "Collecting haystack-experimental (from haystack-ai)\n", - " Using cached haystack_experimental-0.1.1-py3-none-any.whl.metadata (6.9 kB)\n", - "Requirement already satisfied: jinja2 in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from haystack-ai) (3.1.4)\n", - "Collecting lazy-imports (from haystack-ai)\n", - " Using cached lazy_imports-0.3.1-py3-none-any.whl.metadata (10 kB)\n", - "Collecting more-itertools (from haystack-ai)\n", - " Downloading more_itertools-10.5.0-py3-none-any.whl.metadata (36 kB)\n", - "Collecting networkx (from haystack-ai)\n", - " Using cached networkx-3.3-py3-none-any.whl.metadata (5.1 kB)\n", - "Collecting numpy<2 (from haystack-ai)\n", - " Using cached numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl.metadata (61 kB)\n", - "Collecting openai>=1.1.0 (from haystack-ai)\n", - " Downloading openai-1.45.0-py3-none-any.whl.metadata (22 kB)\n", - "Collecting pandas (from haystack-ai)\n", - " Using cached pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl.metadata (19 kB)\n", - "Collecting posthog (from haystack-ai)\n", - " Downloading posthog-3.6.5-py2.py3-none-any.whl.metadata (2.0 kB)\n", - "Requirement already satisfied: python-dateutil in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from haystack-ai) (2.9.0)\n", - "Requirement already satisfied: pyyaml in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from haystack-ai) (6.0.1)\n", - "Requirement already satisfied: requests in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from haystack-ai) (2.32.3)\n", - "Collecting tenacity!=8.4.0 (from haystack-ai)\n", - " Using cached tenacity-9.0.0-py3-none-any.whl.metadata (1.2 kB)\n", - "Collecting tqdm (from haystack-ai)\n", - " Using cached tqdm-4.66.5-py3-none-any.whl.metadata (57 kB)\n", - "Requirement already satisfied: typing-extensions>=4.7 in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from haystack-ai) (4.12.2)\n", - "Collecting anyio<5,>=3.5.0 (from openai>=1.1.0->haystack-ai)\n", - " Using cached anyio-4.4.0-py3-none-any.whl.metadata (4.6 kB)\n", - "Collecting distro<2,>=1.7.0 (from openai>=1.1.0->haystack-ai)\n", - " Using cached distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)\n", - "Collecting httpx<1,>=0.23.0 (from openai>=1.1.0->haystack-ai)\n", - " Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)\n", - "Collecting jiter<1,>=0.4.0 (from openai>=1.1.0->haystack-ai)\n", - " Using cached jiter-0.5.0-cp312-cp312-macosx_10_12_x86_64.whl.metadata (3.6 kB)\n", - "Collecting pydantic<3,>=1.9.0 (from openai>=1.1.0->haystack-ai)\n", - " Downloading pydantic-2.9.1-py3-none-any.whl.metadata (146 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m147.0/147.0 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hCollecting sniffio (from openai>=1.1.0->haystack-ai)\n", - " Using cached sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from jinja2->haystack-ai) (2.1.5)\n", - "Collecting pytz>=2020.1 (from pandas->haystack-ai)\n", - " Downloading pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)\n", - "Collecting tzdata>=2022.7 (from pandas->haystack-ai)\n", - " Using cached tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)\n", - "Requirement already satisfied: six>=1.5 in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from python-dateutil->haystack-ai) (1.16.0)\n", - "Collecting monotonic>=1.5 (from posthog->haystack-ai)\n", - " Using cached monotonic-1.6-py2.py3-none-any.whl.metadata (1.5 kB)\n", - "Collecting backoff>=1.10.0 (from posthog->haystack-ai)\n", - " Using cached backoff-2.2.1-py3-none-any.whl.metadata (14 kB)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from requests->haystack-ai) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from requests->haystack-ai) (3.7)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from requests->haystack-ai) (2.2.2)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from requests->haystack-ai) (2024.6.2)\n", - "Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai>=1.1.0->haystack-ai)\n", - " Using cached httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)\n", - "Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai>=1.1.0->haystack-ai)\n", - " Using cached h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\n", - "Collecting annotated-types>=0.6.0 (from pydantic<3,>=1.9.0->openai>=1.1.0->haystack-ai)\n", - " Using cached annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n", - "Collecting pydantic-core==2.23.3 (from pydantic<3,>=1.9.0->openai>=1.1.0->haystack-ai)\n", - " Downloading pydantic_core-2.23.3-cp312-cp312-macosx_10_12_x86_64.whl.metadata (6.6 kB)\n", - "Using cached haystack_ai-2.5.1-py3-none-any.whl (351 kB)\n", - "Using cached numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl (20.3 MB)\n", - "Downloading openai-1.45.0-py3-none-any.whl (374 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m374.1/374.1 kB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hUsing cached tenacity-9.0.0-py3-none-any.whl (28 kB)\n", - "Using cached tqdm-4.66.5-py3-none-any.whl (78 kB)\n", - "Using cached haystack_experimental-0.1.1-py3-none-any.whl (41 kB)\n", - "Using cached lazy_imports-0.3.1-py3-none-any.whl (12 kB)\n", - "Downloading more_itertools-10.5.0-py3-none-any.whl (60 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.0/61.0 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hUsing cached networkx-3.3-py3-none-any.whl (1.7 MB)\n", - "Using cached pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl (12.5 MB)\n", - "Downloading posthog-3.6.5-py2.py3-none-any.whl (54 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.2/54.2 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hUsing cached anyio-4.4.0-py3-none-any.whl (86 kB)\n", - "Using cached backoff-2.2.1-py3-none-any.whl (15 kB)\n", - "Using cached distro-1.9.0-py3-none-any.whl (20 kB)\n", - "Downloading httpx-0.27.2-py3-none-any.whl (76 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.4/76.4 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hUsing cached httpcore-1.0.5-py3-none-any.whl (77 kB)\n", - "Using cached jiter-0.5.0-cp312-cp312-macosx_10_12_x86_64.whl (283 kB)\n", - "Using cached monotonic-1.6-py2.py3-none-any.whl (8.2 kB)\n", - "Downloading pydantic-2.9.1-py3-none-any.whl (434 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m434.4/434.4 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hDownloading pydantic_core-2.23.3-cp312-cp312-macosx_10_12_x86_64.whl (1.8 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0mm\n", - "\u001b[?25hDownloading pytz-2024.2-py2.py3-none-any.whl (508 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m508.0/508.0 kB\u001b[0m \u001b[31m10.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", - "\u001b[?25hUsing cached sniffio-1.3.1-py3-none-any.whl (10 kB)\n", - "Using cached tzdata-2024.1-py2.py3-none-any.whl (345 kB)\n", - "Using cached annotated_types-0.7.0-py3-none-any.whl (13 kB)\n", - "Using cached h11-0.14.0-py3-none-any.whl (58 kB)\n", - "Installing collected packages: pytz, monotonic, tzdata, tqdm, tenacity, sniffio, pydantic-core, numpy, networkx, more-itertools, lazy-imports, jiter, h11, distro, backoff, annotated-types, pydantic, posthog, pandas, httpcore, anyio, httpx, openai, haystack-experimental, haystack-ai\n", - "Successfully installed annotated-types-0.7.0 anyio-4.4.0 backoff-2.2.1 distro-1.9.0 h11-0.14.0 haystack-ai-2.5.1 haystack-experimental-0.1.1 httpcore-1.0.5 httpx-0.27.2 jiter-0.5.0 lazy-imports-0.3.1 monotonic-1.6 more-itertools-10.5.0 networkx-3.3 numpy-1.26.4 openai-1.45.0 pandas-2.2.2 posthog-3.6.5 pydantic-2.9.1 pydantic-core-2.23.3 pytz-2024.2 sniffio-1.3.1 tenacity-9.0.0 tqdm-4.66.5 tzdata-2024.1\n", - "Collecting langdetect\n", - " Using cached langdetect-1.0.9-py3-none-any.whl\n", - "Requirement already satisfied: six in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from langdetect) (1.16.0)\n", - "Installing collected packages: langdetect\n", - "Successfully installed langdetect-1.0.9\n" - ] - } - ], + "outputs": [], "source": [ "%%bash\n", "\n", @@ -194,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "id": "Ubr7yVt6Gbnj" }, @@ -220,7 +112,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "id": "mN2fFuWWP_8D" }, @@ -267,7 +159,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": { "id": "rfC1ZCigQJgI" }, @@ -291,7 +183,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 5, "metadata": { "id": "FlqGdbuxQNKk" }, @@ -306,7 +198,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "metadata": { "id": "FEw5pfmBQRBB" }, @@ -328,11 +220,33 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "metadata": { "id": "BdvO_fEfcVAY" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "\n", + "πŸš… Components\n", + " - language_classifier: DocumentLanguageClassifier\n", + " - router: MetadataRouter\n", + " - en_writer: DocumentWriter\n", + " - fr_writer: DocumentWriter\n", + " - es_writer: DocumentWriter\n", + "πŸ›€οΈ Connections\n", + " - language_classifier.documents -> router.documents (List[Document])\n", + " - router.en -> en_writer.documents (List[Document])\n", + " - router.fr -> fr_writer.documents (List[Document])\n", + " - router.es -> es_writer.documents (List[Document])" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "indexing_pipeline = Pipeline()\n", "indexing_pipeline.add_component(instance=language_classifier, name=\"language_classifier\")\n", @@ -379,7 +293,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -391,13 +305,15 @@ { "data": { "text/plain": [ - "{'router': {'unmatched': []},\n", + "{'router': {'unmatched': [Document(id=ea7ea338874232de2d8105a258813f50345db82772e21ad2c4549dbb7adce8a3, content: 'Super appartement. Juste au dessus de plusieurs bars qui ferment trΓ¨s tard. A savoir Γ  l'avance. (Bo...', meta: {'language': 'fr'}),\n", + " Document(id=6b64c8a60543ee32b81cd39bc8d6e09fae4bff1b22c6ccdcf414db26fa354e7a, content: 'Un peu salΓ© surtout le sol. Manque de service et de souplesse', meta: {'language': 'fr'}),\n", + " Document(id=b1be23526f19a8af80a190e775bfd05e65878e585529037cb45b47267a4eaa98, content: 'Nous avons passΓ© un sΓ©jour formidable. Merci aux personnes , le bonjours Γ  Ricardo notre taxi man, t...', meta: {'language': 'fr'})]},\n", " 'en_writer': {'documents_written': 2},\n", - " 'fr_writer': {'documents_written': 3},\n", + " 'fr_writer': {'documents_written': 0},\n", " 'es_writer': {'documents_written': 2}}" ] }, - "execution_count": 13, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -419,7 +335,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -433,7 +349,7 @@ "output_type": "stream", "text": [ "English documents: [Document(id=8f64ab234c6a5d5652d02bed144d069ec6e988903b071d16fffbf400abfc1047, content: 'The keypad with a code is convenient and the location is convenient. Basically everything else, very...', meta: {'language': 'en'}), Document(id=d4d878288efba5e28a43ae0195e43dadd0298fe36d3d9b3075c5c5120d27763e, content: 'It is very central and appartement has a nice appearance (even though a lot IKEA stuff), *W A R N I ...', meta: {'language': 'en'})]\n", - "French documents: [Document(id=ea7ea338874232de2d8105a258813f50345db82772e21ad2c4549dbb7adce8a3, content: 'Super appartement. Juste au dessus de plusieurs bars qui ferment trΓ¨s tard. A savoir Γ  l'avance. (Bo...', meta: {'language': 'fr'}), Document(id=6b64c8a60543ee32b81cd39bc8d6e09fae4bff1b22c6ccdcf414db26fa354e7a, content: 'Un peu salΓ© surtout le sol. Manque de service et de souplesse', meta: {'language': 'fr'}), Document(id=b1be23526f19a8af80a190e775bfd05e65878e585529037cb45b47267a4eaa98, content: 'Nous avons passΓ© un sΓ©jour formidable. Merci aux personnes , le bonjours Γ  Ricardo notre taxi man, t...', meta: {'language': 'fr'})]\n", + "French documents: []\n", "Spanish documents: [Document(id=72b094c163b22a660528bc5adbdf0fecf96b4b4d753c1b117f15dba482d2f948, content: 'El apartamento estaba genial y muy cΓ©ntrico, todo a mano. Al lado de la librerΓ­a Lello y De la Torre...', meta: {'language': 'es'}), Document(id=4b37b8bdfffccfb3211ea167b4fdc5121ca51fc5f869b4f834e8da473f0d3353, content: 'CΓ©ntrico. Muy cΓ³modo para moverse y ver Oporto. Edificio con terraza propia en la ΓΊltima planta. Tod...', meta: {'language': 'es'})]\n" ] } @@ -459,7 +375,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -489,7 +405,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 12, "metadata": { "id": "CN1N2sn1yUVx" }, @@ -497,11 +413,12 @@ "source": [ "from haystack.components.retrievers.in_memory import InMemoryBM25Retriever\n", "from haystack.components.joiners import DocumentJoiner\n", - "from haystack.components.builders import PromptBuilder\n", - "from haystack.components.generators import OpenAIGenerator\n", + "from haystack.components.builders import ChatPromptBuilder\n", + "from haystack.components.generators.chat import OpenAIChatGenerator\n", + "from haystack.dataclasses import ChatMessage\n", "from haystack.components.routers import TextLanguageRouter\n", "\n", - "prompt_template = \"\"\"\n", + "prompt_template = [ChatMessage.from_user(\"\"\"\n", "You will be provided with reviews for an accommodation.\n", "Answer the question concisely based solely on the given reviews.\n", "Reviews:\n", @@ -510,7 +427,7 @@ " {% endfor %}\n", "Question: {{ query}}\n", "Answer:\n", - "\"\"\"" + "\"\"\")]" ] }, { @@ -525,19 +442,47 @@ "- `TextLanguageRouter`\n", "- `InMemoryBM25Retriever`. You'll need a retriever per language, since each language has its own `DocumentStore`.\n", "- `DocumentJoiner`\n", - "- `PromptBuilder`\n", - "- `OpenAIGenerator`\n", + "- `ChatPromptBuilder`\n", + "- `OpenAIChatGenerator`\n", "\n", "> Note: The `BM25Retriever` essentially does keyword matching, which isn't as accurate as other search methods. In order to make the LLM responses more precise, you could refacctor your piplines to use an [`EmbeddingRetriever`](https://docs.haystack.deepset.ai/docs/inmemoryembeddingretriever) which performs vector search over the documents." ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 13, "metadata": { "id": "BN1Hr_BjWKcl" }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "\n", + "πŸš… Components\n", + " - router: TextLanguageRouter\n", + " - en_retriever: InMemoryBM25Retriever\n", + " - fr_retriever: InMemoryBM25Retriever\n", + " - es_retriever: InMemoryBM25Retriever\n", + " - joiner: DocumentJoiner\n", + " - prompt_builder: ChatPromptBuilder\n", + " - llm: OpenAIChatGenerator\n", + "πŸ›€οΈ Connections\n", + " - router.en -> en_retriever.query (str)\n", + " - router.fr -> fr_retriever.query (str)\n", + " - router.es -> es_retriever.query (str)\n", + " - en_retriever.documents -> joiner.documents (List[Document])\n", + " - fr_retriever.documents -> joiner.documents (List[Document])\n", + " - es_retriever.documents -> joiner.documents (List[Document])\n", + " - joiner.documents -> prompt_builder.documents (List[Document])\n", + " - prompt_builder.prompt -> llm.messages (List[ChatMessage])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "rag_pipeline = Pipeline()\n", "rag_pipeline.add_component(instance=TextLanguageRouter([\"en\", \"fr\", \"es\"]), name=\"router\")\n", @@ -545,8 +490,8 @@ "rag_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=fr_document_store), name=\"fr_retriever\")\n", "rag_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=es_document_store), name=\"es_retriever\")\n", "rag_pipeline.add_component(instance=DocumentJoiner(), name=\"joiner\")\n", - "rag_pipeline.add_component(instance=PromptBuilder(template=prompt_template), name=\"prompt_builder\")\n", - "rag_pipeline.add_component(instance=OpenAIGenerator(), name=\"llm\")\n", + "rag_pipeline.add_component(instance=ChatPromptBuilder(template=prompt_template), name=\"prompt_builder\")\n", + "rag_pipeline.add_component(instance=OpenAIChatGenerator(), name=\"llm\")\n", "\n", "\n", "rag_pipeline.connect(\"router.en\", \"en_retriever.query\")\n", @@ -556,7 +501,7 @@ "rag_pipeline.connect(\"fr_retriever\", \"joiner\")\n", "rag_pipeline.connect(\"es_retriever\", \"joiner\")\n", "rag_pipeline.connect(\"joiner.documents\", \"prompt_builder.documents\")\n", - "rag_pipeline.connect(\"prompt_builder\", \"llm\")" + "rag_pipeline.connect(\"prompt_builder.prompt\", \"llm.messages\")" ] }, { @@ -570,7 +515,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -595,7 +540,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -617,22 +562,7 @@ "id": "wj24fjXN0l6v", "outputId": "3c1eed33-c31c-4b72-bcda-fdd64744560b" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Ranking by BM25...: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 2/2 [00:00<00:00, 3134.76 docs/s]" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], + "outputs": [], "source": [ "en_question = \"Is this apartment conveniently located?\"\n", "\n", @@ -641,7 +571,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -654,7 +584,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Yes, the apartment is conveniently located.\n" + "ChatMessage(content='Yes, the apartment is conveniently located.', role=, name=None, meta={'model': 'gpt-4o-mini-2024-07-18', 'index': 0, 'finish_reason': 'stop', 'usage': {'completion_tokens': 8, 'prompt_tokens': 365, 'total_tokens': 373, 'prompt_tokens_details': {'cached_tokens': 0, 'audio_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0, 'audio_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}}})\n" ] } ], @@ -673,7 +603,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 17, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -695,15 +625,7 @@ "id": "B4_Be1bs1jxJ", "outputId": "0b96cf29-d633-4c9b-f54c-a785e1c2cbe4" }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Ranking by BM25...: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 2/2 [00:00<00:00, 15887.52 docs/s]\n" - ] - } - ], + "outputs": [], "source": [ "es_question = \"ΒΏEl desayuno es genial?\"\n", "\n", @@ -712,7 +634,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 18, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -725,12 +647,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "No, el desayuno no es genial.\n" + "SΓ­, el desayuno es descrito como estupendo.\n" ] } ], "source": [ - "print(result[\"llm\"][\"replies\"][0])" + "print(result[\"llm\"][\"replies\"][0].content)" ] }, { @@ -773,7 +695,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.4" + "version": "3.9.6" }, "widgets": { "application/vnd.jupyter.widget-state+json": { From d89bba5304ac9f3922c3bb6b356b275f5c64b3f1 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Sun, 8 Dec 2024 23:54:59 +0100 Subject: [PATCH 3/4] updated tutorial 35 --- tutorials/35_Evaluating_RAG_Pipelines.ipynb | 2504 ++++++------------- 1 file changed, 746 insertions(+), 1758 deletions(-) diff --git a/tutorials/35_Evaluating_RAG_Pipelines.ipynb b/tutorials/35_Evaluating_RAG_Pipelines.ipynb index 0f1d860..5ebb551 100644 --- a/tutorials/35_Evaluating_RAG_Pipelines.ipynb +++ b/tutorials/35_Evaluating_RAG_Pipelines.ipynb @@ -10,7 +10,7 @@ "\n", "- **Level**: Intermediate\n", "- **Time to complete**: 15 minutes\n", - "- **Components Used**: `InMemoryDocumentStore`, `InMemoryEmbeddingRetriever`, `PromptBuilder`, `OpenAIGenerator`, `DocumentMRREvaluator`, `FaithfulnessEvaluator`, `SASEvaluator`\n", + "- **Components Used**: `InMemoryDocumentStore`, `InMemoryEmbeddingRetriever`, `ChatPromptBuilder`, `OpenAIChatGenerator`, `DocumentMRREvaluator`, `FaithfulnessEvaluator`, `SASEvaluator`\n", "- **Prerequisites**: You must have an API key from an active OpenAI account as this tutorial is using the gpt-4o-mini model by OpenAI: https://platform.openai.com/api-keys\n", "- **Goal**: After completing this tutorial, you'll have learned how to evaluate your RAG pipelines both with model-based, and statistical metrics available in the Haystack evaluation offering. You'll also see which other evaluation frameworks are integrated with Haystack.\n", "\n", @@ -93,128 +93,145 @@ "name": "stdout", "output_type": "stream", "text": [ - "Collecting git+https://github.com/deepset-ai/haystack.git@main\n", - " Cloning https://github.com/deepset-ai/haystack.git (to revision main) to /tmp/pip-req-build-83hiigdl\n", - " Resolved https://github.com/deepset-ai/haystack.git to commit 2509eeea7e82ef52ef65ccce00bfdcc6c1e8c1c2\n", - " Installing build dependencies: started\n", - " Installing build dependencies: finished with status 'done'\n", - " Getting requirements to build wheel: started\n", - " Getting requirements to build wheel: finished with status 'done'\n", - " Preparing metadata (pyproject.toml): started\n", - " Preparing metadata (pyproject.toml): finished with status 'done'\n", - "Collecting boilerpy3 (from haystack-ai==2.1.0rc0)\n", - " Downloading boilerpy3-1.0.7-py3-none-any.whl (22 kB)\n", - "Collecting haystack-bm25 (from haystack-ai==2.1.0rc0)\n", - " Downloading haystack_bm25-1.0.2-py2.py3-none-any.whl (8.8 kB)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.1.0rc0) (3.1.3)\n", - "Collecting lazy-imports (from haystack-ai==2.1.0rc0)\n", - " Downloading lazy_imports-0.3.1-py3-none-any.whl (12 kB)\n", - "Requirement already satisfied: more-itertools in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.1.0rc0) (10.1.0)\n", - "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.1.0rc0) (3.3)\n", - "Requirement already satisfied: numpy in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.1.0rc0) (1.25.2)\n", - "Collecting openai>=1.1.0 (from haystack-ai==2.1.0rc0)\n", - " Downloading openai-1.25.0-py3-none-any.whl (312 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 312.9/312.9 kB 9.8 MB/s eta 0:00:00\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.1.0rc0) (2.0.3)\n", - "Collecting posthog (from haystack-ai==2.1.0rc0)\n", - " Downloading posthog-3.5.0-py2.py3-none-any.whl (41 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 41.3/41.3 kB 4.4 MB/s eta 0:00:00\n", - "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.1.0rc0) (2.8.2)\n", - "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.1.0rc0) (6.0.1)\n", - "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.1.0rc0) (2.31.0)\n", - "Requirement already satisfied: tenacity in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.1.0rc0) (8.2.3)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.1.0rc0) (4.66.2)\n", - "Requirement already satisfied: typing-extensions>=4.7 in /usr/local/lib/python3.10/dist-packages (from haystack-ai==2.1.0rc0) (4.11.0)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.10/dist-packages (from openai>=1.1.0->haystack-ai==2.1.0rc0) (3.7.1)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /usr/lib/python3/dist-packages (from openai>=1.1.0->haystack-ai==2.1.0rc0) (1.7.0)\n", - "Collecting httpx<1,>=0.23.0 (from openai>=1.1.0->haystack-ai==2.1.0rc0)\n", - " Downloading httpx-0.27.0-py3-none-any.whl (75 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 75.6/75.6 kB 7.4 MB/s eta 0:00:00\n", - "Requirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.10/dist-packages (from openai>=1.1.0->haystack-ai==2.1.0rc0) (2.7.1)\n", - "Requirement already satisfied: sniffio in /usr/local/lib/python3.10/dist-packages (from openai>=1.1.0->haystack-ai==2.1.0rc0) (1.3.1)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->haystack-ai==2.1.0rc0) (2.1.5)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->haystack-ai==2.1.0rc0) (2023.4)\n", - "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->haystack-ai==2.1.0rc0) (2024.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil->haystack-ai==2.1.0rc0) (1.16.0)\n", - "Collecting monotonic>=1.5 (from posthog->haystack-ai==2.1.0rc0)\n", - " Downloading monotonic-1.6-py2.py3-none-any.whl (8.2 kB)\n", - "Collecting backoff>=1.10.0 (from posthog->haystack-ai==2.1.0rc0)\n", - " Downloading backoff-2.2.1-py3-none-any.whl (15 kB)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->haystack-ai==2.1.0rc0) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->haystack-ai==2.1.0rc0) (3.7)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->haystack-ai==2.1.0rc0) (2.0.7)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->haystack-ai==2.1.0rc0) (2024.2.2)\n", - "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<5,>=3.5.0->openai>=1.1.0->haystack-ai==2.1.0rc0) (1.2.1)\n", - "Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai>=1.1.0->haystack-ai==2.1.0rc0)\n", - " Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 77.9/77.9 kB 12.3 MB/s eta 0:00:00\n", - "Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai>=1.1.0->haystack-ai==2.1.0rc0)\n", - " Downloading h11-0.14.0-py3-none-any.whl (58 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 58.3/58.3 kB 10.2 MB/s eta 0:00:00\n", - "Requirement already satisfied: annotated-types>=0.4.0 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->openai>=1.1.0->haystack-ai==2.1.0rc0) (0.6.0)\n", - "Requirement already satisfied: pydantic-core==2.18.2 in /usr/local/lib/python3.10/dist-packages (from pydantic<3,>=1.9.0->openai>=1.1.0->haystack-ai==2.1.0rc0) (2.18.2)\n", - "Building wheels for collected packages: haystack-ai\n", - " Building wheel for haystack-ai (pyproject.toml): started\n", - " Building wheel for haystack-ai (pyproject.toml): finished with status 'done'\n", - " Created wheel for haystack-ai: filename=haystack_ai-2.1.0rc0-py3-none-any.whl size=316211 sha256=aee4b70fda05260e7466d477508440735cfe4d5c3b9a15a7003773a7fa01bd0c\n", - " Stored in directory: /tmp/pip-ephem-wheel-cache-faxhntm2/wheels/23/e0/55/004621325804423c8026b4b5008ddb11f337bf73284d1b9caf\n", - "Successfully built haystack-ai\n", - "Installing collected packages: monotonic, lazy-imports, haystack-bm25, h11, boilerpy3, backoff, posthog, httpcore, httpx, openai, haystack-ai\n", - "Successfully installed backoff-2.2.1 boilerpy3-1.0.7 h11-0.14.0 haystack-ai-2.1.0rc0 haystack-bm25-1.0.2 httpcore-1.0.5 httpx-0.27.0 lazy-imports-0.3.1 monotonic-1.6 openai-1.25.0 posthog-3.5.0\n", - "Collecting datasets>=2.6.1\n", - " Downloading datasets-2.19.0-py3-none-any.whl (542 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 542.0/542.0 kB 9.3 MB/s eta 0:00:00\n", - "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets>=2.6.1) (3.13.4)\n", - "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.6.1) (1.25.2)\n", - "Requirement already satisfied: pyarrow>=12.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.6.1) (14.0.2)\n", - "Requirement already satisfied: pyarrow-hotfix in /usr/local/lib/python3.10/dist-packages (from datasets>=2.6.1) (0.6)\n", - "Collecting dill<0.3.9,>=0.3.0 (from datasets>=2.6.1)\n", - " Downloading dill-0.3.8-py3-none-any.whl (116 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 116.3/116.3 kB 11.3 MB/s eta 0:00:00\n", - "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets>=2.6.1) (2.0.3)\n", - "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.6.1) (2.31.0)\n", - "Requirement already satisfied: tqdm>=4.62.1 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.6.1) (4.66.2)\n", - "Collecting xxhash (from datasets>=2.6.1)\n", - " Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 194.1/194.1 kB 12.5 MB/s eta 0:00:00\n", - "Collecting multiprocess (from datasets>=2.6.1)\n", - " Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 134.8/134.8 kB 11.9 MB/s eta 0:00:00\n", - "Requirement already satisfied: fsspec[http]<=2024.3.1,>=2023.1.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.6.1) (2023.6.0)\n", - "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.6.1) (3.9.5)\n", - "Collecting huggingface-hub>=0.21.2 (from datasets>=2.6.1)\n", - " Downloading huggingface_hub-0.22.2-py3-none-any.whl (388 kB)\n", - " ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 388.9/388.9 kB 17.1 MB/s eta 0:00:00\n", - "Requirement already satisfied: packaging in /usr/local/lib/python3.10/dist-packages (from datasets>=2.6.1) (24.0)\n", - "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.6.1) (6.0.1)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.6.1) (1.3.1)\n", - "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.6.1) (23.2.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.6.1) (1.4.1)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.6.1) (6.0.5)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.6.1) (1.9.4)\n", - "Requirement already satisfied: async-timeout<5.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.6.1) (4.0.3)\n", - "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.21.2->datasets>=2.6.1) (4.11.0)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets>=2.6.1) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets>=2.6.1) (3.7)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets>=2.6.1) (2.0.7)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.19.0->datasets>=2.6.1) (2024.2.2)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.6.1) (2.8.2)\n", - "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.6.1) (2023.4)\n", - "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.6.1) (2024.1)\n", - "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets>=2.6.1) (1.16.0)\n", - "Installing collected packages: xxhash, dill, multiprocess, huggingface-hub, datasets\n", - " Attempting uninstall: huggingface-hub\n", - " Found existing installation: huggingface-hub 0.20.3\n", - " Uninstalling huggingface-hub-0.20.3:\n", - " Successfully uninstalled huggingface-hub-0.20.3\n", - "Successfully installed datasets-2.19.0 dill-0.3.8 huggingface-hub-0.22.2 multiprocess-0.70.16 xxhash-3.4.1\n" + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: haystack-ai in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (2.8.0)\n", + "Requirement already satisfied: haystack-experimental in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from haystack-ai) (0.3.0)\n", + "Requirement already satisfied: jinja2 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from haystack-ai) (3.1.4)\n", + "Requirement already satisfied: lazy-imports in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from haystack-ai) (0.3.1)\n", + "Requirement already satisfied: more-itertools in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from haystack-ai) (10.2.0)\n", + "Requirement already satisfied: networkx in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from haystack-ai) (3.2.1)\n", + "Requirement already satisfied: numpy in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from haystack-ai) (1.26.4)\n", + "Requirement already satisfied: openai>=1.1.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from haystack-ai) (1.31.1)\n", + "Requirement already satisfied: pandas in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from haystack-ai) (2.2.2)\n", + "Requirement already satisfied: posthog in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from haystack-ai) (3.5.0)\n", + "Requirement already satisfied: python-dateutil in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from haystack-ai) (2.9.0.post0)\n", + "Requirement already satisfied: pyyaml in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from haystack-ai) (6.0.1)\n", + "Requirement already satisfied: requests in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from haystack-ai) (2.32.3)\n", + "Requirement already satisfied: tenacity!=8.4.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from haystack-ai) (8.3.0)\n", + "Requirement already satisfied: tqdm in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from haystack-ai) (4.66.4)\n", + "Requirement already satisfied: typing-extensions>=4.7 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from haystack-ai) (4.12.1)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from openai>=1.1.0->haystack-ai) (4.4.0)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from openai>=1.1.0->haystack-ai) (1.9.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from openai>=1.1.0->haystack-ai) (0.27.0)\n", + "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from openai>=1.1.0->haystack-ai) (2.7.3)\n", + "Requirement already satisfied: sniffio in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from openai>=1.1.0->haystack-ai) (1.3.1)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from jinja2->haystack-ai) (2.1.5)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from pandas->haystack-ai) (2024.1)\n", + "Requirement already satisfied: tzdata>=2022.7 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from pandas->haystack-ai) (2024.1)\n", + "Requirement already satisfied: six>=1.5 in /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/site-packages (from python-dateutil->haystack-ai) (1.15.0)\n", + "Requirement already satisfied: monotonic>=1.5 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from posthog->haystack-ai) (1.6)\n", + "Requirement already satisfied: backoff>=1.10.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from posthog->haystack-ai) (2.2.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from requests->haystack-ai) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from requests->haystack-ai) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from requests->haystack-ai) (1.26.18)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from requests->haystack-ai) (2024.6.2)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from anyio<5,>=3.5.0->openai>=1.1.0->haystack-ai) (1.2.1)\n", + "Requirement already satisfied: httpcore==1.* in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from httpx<1,>=0.23.0->openai>=1.1.0->haystack-ai) (1.0.5)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai>=1.1.0->haystack-ai) (0.14.0)\n", + "Requirement already satisfied: annotated-types>=0.4.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from pydantic<3,>=1.9.0->openai>=1.1.0->haystack-ai) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.18.4 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from pydantic<3,>=1.9.0->openai>=1.1.0->haystack-ai) (2.18.4)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - " Running command git clone --filter=blob:none --quiet https://github.com/deepset-ai/haystack.git /tmp/pip-req-build-83hiigdl\n" + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: datasets>=2.6.1 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (3.1.0)\n", + "Requirement already satisfied: filelock in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from datasets>=2.6.1) (3.14.0)\n", + "Requirement already satisfied: numpy>=1.17 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from datasets>=2.6.1) (1.26.4)\n", + "Requirement already satisfied: pyarrow>=15.0.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from datasets>=2.6.1) (18.1.0)\n", + "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from datasets>=2.6.1) (0.3.8)\n", + "Requirement already satisfied: pandas in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from datasets>=2.6.1) (2.2.2)\n", + "Requirement already satisfied: requests>=2.32.2 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from datasets>=2.6.1) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.66.3 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from datasets>=2.6.1) (4.66.4)\n", + "Requirement already satisfied: xxhash in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from datasets>=2.6.1) (3.5.0)\n", + "Requirement already satisfied: multiprocess<0.70.17 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from datasets>=2.6.1) (0.70.16)\n", + "Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets>=2.6.1) (2024.6.0)\n", + "Requirement already satisfied: aiohttp in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from datasets>=2.6.1) (3.11.10)\n", + "Requirement already satisfied: huggingface-hub>=0.23.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from datasets>=2.6.1) (0.23.3)\n", + "Requirement already satisfied: packaging in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from datasets>=2.6.1) (24.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from datasets>=2.6.1) (6.0.1)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from aiohttp->datasets>=2.6.1) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from aiohttp->datasets>=2.6.1) (1.3.1)\n", + "Requirement already satisfied: async-timeout<6.0,>=4.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from aiohttp->datasets>=2.6.1) (5.0.1)\n", + "Requirement already satisfied: attrs>=17.3.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from aiohttp->datasets>=2.6.1) (24.2.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from aiohttp->datasets>=2.6.1) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from aiohttp->datasets>=2.6.1) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from aiohttp->datasets>=2.6.1) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from aiohttp->datasets>=2.6.1) (1.18.3)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from huggingface-hub>=0.23.0->datasets>=2.6.1) (4.12.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from requests>=2.32.2->datasets>=2.6.1) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from requests>=2.32.2->datasets>=2.6.1) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from requests>=2.32.2->datasets>=2.6.1) (1.26.18)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from requests>=2.32.2->datasets>=2.6.1) (2024.6.2)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from pandas->datasets>=2.6.1) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from pandas->datasets>=2.6.1) (2024.1)\n", + "Requirement already satisfied: tzdata>=2022.7 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from pandas->datasets>=2.6.1) (2024.1)\n", + "Requirement already satisfied: six>=1.5 in /Library/Developer/CommandLineTools/Library/Frameworks/Python3.framework/Versions/3.9/lib/python3.9/site-packages (from python-dateutil>=2.8.2->pandas->datasets>=2.6.1) (1.15.0)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Defaulting to user installation because normal site-packages is not writeable\n", + "Requirement already satisfied: sentence-transformers>=3.0.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (3.0.0)\n", + "Requirement already satisfied: transformers<5.0.0,>=4.34.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from sentence-transformers>=3.0.0) (4.41.2)\n", + "Requirement already satisfied: tqdm in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from sentence-transformers>=3.0.0) (4.66.4)\n", + "Requirement already satisfied: torch>=1.11.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from sentence-transformers>=3.0.0) (2.3.1)\n", + "Requirement already satisfied: numpy in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from sentence-transformers>=3.0.0) (1.26.4)\n", + "Requirement already satisfied: scikit-learn in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from sentence-transformers>=3.0.0) (1.5.0)\n", + "Requirement already satisfied: scipy in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from sentence-transformers>=3.0.0) (1.13.1)\n", + "Requirement already satisfied: huggingface-hub>=0.15.1 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from sentence-transformers>=3.0.0) (0.23.3)\n", + "Requirement already satisfied: Pillow in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from sentence-transformers>=3.0.0) (10.3.0)\n", + "Requirement already satisfied: filelock in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from huggingface-hub>=0.15.1->sentence-transformers>=3.0.0) (3.14.0)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from huggingface-hub>=0.15.1->sentence-transformers>=3.0.0) (2024.6.0)\n", + "Requirement already satisfied: packaging>=20.9 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from huggingface-hub>=0.15.1->sentence-transformers>=3.0.0) (24.0)\n", + "Requirement already satisfied: pyyaml>=5.1 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from huggingface-hub>=0.15.1->sentence-transformers>=3.0.0) (6.0.1)\n", + "Requirement already satisfied: requests in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from huggingface-hub>=0.15.1->sentence-transformers>=3.0.0) (2.32.3)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from huggingface-hub>=0.15.1->sentence-transformers>=3.0.0) (4.12.1)\n", + "Requirement already satisfied: sympy in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from torch>=1.11.0->sentence-transformers>=3.0.0) (1.12.1)\n", + "Requirement already satisfied: networkx in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from torch>=1.11.0->sentence-transformers>=3.0.0) (3.2.1)\n", + "Requirement already satisfied: jinja2 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from torch>=1.11.0->sentence-transformers>=3.0.0) (3.1.4)\n", + "Requirement already satisfied: regex!=2019.12.17 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from transformers<5.0.0,>=4.34.0->sentence-transformers>=3.0.0) (2024.5.15)\n", + "Requirement already satisfied: tokenizers<0.20,>=0.19 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from transformers<5.0.0,>=4.34.0->sentence-transformers>=3.0.0) (0.19.1)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from transformers<5.0.0,>=4.34.0->sentence-transformers>=3.0.0) (0.4.3)\n", + "Requirement already satisfied: joblib>=1.2.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from scikit-learn->sentence-transformers>=3.0.0) (1.4.2)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from scikit-learn->sentence-transformers>=3.0.0) (3.5.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from jinja2->torch>=1.11.0->sentence-transformers>=3.0.0) (2.1.5)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers>=3.0.0) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers>=3.0.0) (3.7)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers>=3.0.0) (1.26.18)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from requests->huggingface-hub>=0.15.1->sentence-transformers>=3.0.0) (2024.6.2)\n", + "Requirement already satisfied: mpmath<1.4.0,>=1.1.0 in /Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages (from sympy->torch>=1.11.0->sentence-transformers>=3.0.0) (1.3.0)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n" ] } ], @@ -239,11 +256,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": { "id": "A76B4S49O-qa" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "from haystack.telemetry import tutorial_running\n", "\n", @@ -271,7 +297,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -339,74 +365,12 @@ }, "outputs": [ { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f4dcb30b5589434f9faa18eb9563c738", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Downloading readme: 0%| | 0.00/498 [00:00\n", + "\n", "πŸš… Components\n", " - query_embedder: SentenceTransformersTextEmbedder\n", " - retriever: InMemoryEmbeddingRetriever\n", - " - prompt_builder: PromptBuilder\n", - " - generator: OpenAIGenerator\n", + " - prompt_builder: ChatPromptBuilder\n", + " - generator: OpenAIChatGenerator\n", " - answer_builder: AnswerBuilder\n", "πŸ›€οΈ Connections\n", " - query_embedder.embedding -> retriever.query_embedding (List[float])\n", " - retriever.documents -> prompt_builder.documents (List[Document])\n", " - retriever.documents -> answer_builder.documents (List[Document])\n", - " - prompt_builder.prompt -> generator.prompt (str)\n", - " - generator.replies -> answer_builder.replies (List[str])\n", - " - generator.meta -> answer_builder.meta (List[Dict[str, Any]])" + " - prompt_builder.prompt -> generator.messages (List[ChatMessage])\n", + " - generator.replies -> answer_builder.replies (List[ChatMessage])" ] }, - "execution_count": 4, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -836,15 +633,16 @@ "source": [ "import os\n", "from getpass import getpass\n", - "from haystack.components.builders import AnswerBuilder, PromptBuilder\n", + "from haystack.components.builders import AnswerBuilder, ChatPromptBuilder\n", + "from haystack.dataclasses import ChatMessage\n", "from haystack.components.embedders import SentenceTransformersTextEmbedder\n", - "from haystack.components.generators import OpenAIGenerator\n", + "from haystack.components.generators.chat import OpenAIChatGenerator\n", "from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever\n", "\n", "if \"OPENAI_API_KEY\" not in os.environ:\n", " os.environ[\"OPENAI_API_KEY\"] = getpass(\"Enter OpenAI API key:\")\n", "\n", - "template = \"\"\"\n", + "template = [ChatMessage.from_user(\"\"\"\n", " You have to answer the following question based on the given context information only.\n", "\n", " Context:\n", @@ -854,22 +652,21 @@ "\n", " Question: {{question}}\n", " Answer:\n", - " \"\"\"\n", + " \"\"\")]\n", "\n", "rag_pipeline = Pipeline()\n", "rag_pipeline.add_component(\n", " \"query_embedder\", SentenceTransformersTextEmbedder(model=\"sentence-transformers/all-MiniLM-L6-v2\")\n", ")\n", "rag_pipeline.add_component(\"retriever\", InMemoryEmbeddingRetriever(document_store, top_k=3))\n", - "rag_pipeline.add_component(\"prompt_builder\", PromptBuilder(template=template))\n", - "rag_pipeline.add_component(\"generator\", OpenAIGenerator(model=\"gpt-4o-mini\"))\n", + "rag_pipeline.add_component(\"prompt_builder\", ChatPromptBuilder(template=template))\n", + "rag_pipeline.add_component(\"generator\", OpenAIChatGenerator(model=\"gpt-4o-mini\"))\n", "rag_pipeline.add_component(\"answer_builder\", AnswerBuilder())\n", "\n", "rag_pipeline.connect(\"query_embedder\", \"retriever.query_embedding\")\n", "rag_pipeline.connect(\"retriever\", \"prompt_builder.documents\")\n", - "rag_pipeline.connect(\"prompt_builder\", \"generator\")\n", + "rag_pipeline.connect(\"prompt_builder.prompt\", \"generator.messages\")\n", "rag_pipeline.connect(\"generator.replies\", \"answer_builder.replies\")\n", - "rag_pipeline.connect(\"generator.meta\", \"answer_builder.meta\")\n", "rag_pipeline.connect(\"retriever\", \"answer_builder.documents\")" ] }, @@ -886,7 +683,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -910,24 +707,21 @@ }, "outputs": [ { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "f92db6040c414987acb2e855b5efe29a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Batches: 0%| | 0/1 [00:00\n", - "
\n", + "
\n", "\n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "
\n", - " \n" + "" ], "text/plain": [ - " score\n", - "doc_mrr_evaluator 1.000000\n", - "faithfulness 1.000000\n", - "sas_evaluator 0.718074" + " metrics score\n", + "0 doc_mrr_evaluator 1.000000\n", + "1 faithfulness 0.985333\n", + "2 sas_evaluator 0.711842" ] }, - "execution_count": 17, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -2316,7 +1733,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2328,15 +1745,8 @@ "outputs": [ { "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"results_df\",\n \"rows\": 25,\n \"fields\": [\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 25,\n \"samples\": [\n \"Are women using bleach for home cleaning at increased risk of non-allergic asthma?\",\n \"Does puerarin inhibit the inflammatory response in atherosclerosis via modulation of the NF-\\u03baB pathway in a rabbit model?\",\n \"'s it only what you say , it 's also how you say it : communicating nipah virus prevention messages during an outbreak in Bangladesh?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"contexts\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 25,\n \"samples\": [\n \"Frequent use of bleach for home-cleaning is associated with non-allergic adult-onset asthma, elevated neutrophil counts and lower-airway symptoms in women.\",\n \"This study indicates that the effect of puerarin on the suppression of atherosclerosis was connected with an inhibited inflammatory response and reduced NF-\\u03baB activation.\",\n \"During outbreaks, one-way behaviour change communication without meaningful causal explanations is unlikely to be effective. Based on the cultural context, interactive communication strategies in lay language with supporting evidence can make biomedical prevention messages credible in affected communities, even among those who initially invoke supernatural causal explanations.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"predicted_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 25,\n \"samples\": [\n \"Yes, women using bleach for home cleaning are at an increased risk of non-allergic asthma. The study showed that bleach use was significantly associated with non-allergic asthma, particularly non-allergic adult-onset asthma. Women using bleach frequently were more likely to have current asthma compared to non-users, and there were positive associations found between bleach use and bronchial hyperresponsiveness, asthma-like symptoms, and chronic cough among women without allergic sensitization.\",\n \"Yes, puerarin inhibits the inflammatory response in atherosclerosis via modulation of the NF-\\u03baB pathway in a rabbit model. The study found that puerarin reduced the protein and mRNA levels of adhesion molecules (AMs) in the rabbit model of atherosclerosis. It was also noted that the reduced AM levels were due to inhibition of the phosphorylation and degradation of inhibitor-\\u03baB (I-\\u03baB), resulting in reduced p65 NF-\\u03baB nuclear translocation. This indicates that puerarin has a modulatory effect on the NF-\\u03baB pathway, which plays a crucial role in the inflammatory response in atherosclerosis.\",\n \"During the Nipah virus outbreak in Bangladesh, it was not only important to convey prevention messages but also how they were communicated. Field anthropologists played a crucial role in bridging the gap between biomedical explanations and local beliefs about the outbreak. Through interactive sessions with residents and using photos to illustrate how the virus could be transmitted, they were able to successfully convey the message. Prior to this intervention, residents believed in supernatural causes and continued risky behaviors like consuming raw date palm sap. However, after the intervention, residents understood the importance of abstaining from such practices and adopting safer behaviors. This shows that the manner in which prevention messages are communicated can greatly impact their effectiveness during an outbreak.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"doc_mrr_evaluator\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.0,\n \"min\": 1.0,\n \"max\": 1.0,\n \"num_unique_values\": 1,\n \"samples\": [\n 1.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"faithfulness\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.0,\n \"min\": 1.0,\n \"max\": 1.0,\n \"num_unique_values\": 1,\n \"samples\": [\n 1.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sas_evaluator\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.13475112832644295,\n \"min\": 0.46613821387290955,\n \"max\": 0.9011739492416382,\n \"num_unique_values\": 25,\n \"samples\": [\n 0.8999284505844116\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe", - "variable_name": "results_df" - }, "text/html": [ - "\n", - "
\n", - "
\n", + "
\n", "\n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "
\n", - "
\n" + "" ], "text/plain": [ " question \\\n", - "0 's it only what you say , it 's also how you s... \n", - "1 Does relieving dyspnoea by non-invasive ventil... \n", - "2 Is patient satisfaction biased by renovations ... \n", - "3 Is cD30 expression a novel prognostic indicato... \n", - "4 Is obesity associated with increased postopera... \n", - "5 Does deep Sequencing the microRNA profile in r... \n", - "6 Is dorsal plication without degloving safe and... \n", - "7 Does mental fatigue affect maximal anaerobic e... \n", - "8 Are women using bleach for home cleaning at in... \n", - "9 Does trichostatin A inhibit Retinal Pigmented ... \n", - "10 Are vitamin D levels and bone turnover markers... \n", - "11 Does alcohol disrupt levels and function of th... \n", - "12 Do genome-wide ancestry patterns in Rapanui su... \n", - "13 Is termination of Nociceptive Bahaviour at the... \n", - "14 Is real-time three-dimensional transesophageal... \n", - "15 Does thalidomide control adipose tissue inflam... \n", - "16 Does puerarin inhibit the inflammatory respons... \n", - "17 Is serum free 1,25-dihydroxy-vitamin D more cl... \n", - "18 Do a critical analysis of secondary overtriage... \n", - "19 Is methylation of the FGFR2 gene associated wi... \n", - "20 Do two decades of British newspaper coverage r... \n", - "21 Are phospholipase C epsilon 1 ( PLCE1 ) haplot... \n", - "22 Are reclassification rates higher among Africa... \n", - "23 Does health indicators associated with fall am... \n", - "24 Do maternal and childhood psychological factor... \n", + "0 Is higher fibrinogen level independently linke... \n", + "1 Are successional changes in the chicken cecal ... \n", + "2 Does [ ITF increase the transcriptional activi... \n", + "3 Is peer-instructed seminar attendance associat... \n", + "4 Are serum TARC levels strongly correlated with... \n", + "5 Does dexmedetomidine reduce atrial fibrillatio... \n", + "6 Does lidocaine potentiate the deleterious effe... \n", + "7 Does multicenter immunohistochemical ALK-testi... \n", + "8 Does generalizability of trial result to elder... \n", + "9 Does vascular reconstruction play an important... \n", + "10 Is transient receptor potential ankyrin 1 ( TR... \n", + "11 Is incidence of Type 1 Diabetes Increasing in ... \n", + "12 Does transjugular intrahepatic portosystemic s... \n", + "13 Does plasmid pPCP1-derived sRNA HmsA promote b... \n", + "14 Does trichostatin A inhibit Retinal Pigmented ... \n", + "15 Is endo first appropriate in some patients wit... \n", + "16 Do the effects of ifenprodil on the activity o... \n", + "17 Does improving disease incidence estimate in p... \n", + "18 Does unit support protect against sexual haras... \n", + "19 Does treatment with anti-C5a antibody improve ... \n", + "20 Is impaired renal function associated with rec... \n", + "21 Does melatonin prevent radiation-induced oxida... \n", + "22 Is the ADAMTS13-von Willebrand factor axis inv... \n", + "23 Is repeat endoscopic ultrasound fine needle as... \n", + "24 Does tRAIL receptor deletion in mice suppress ... \n", "\n", " contexts \\\n", - "0 [During a fatal Nipah virus (NiV) outbreak in ... \n", - "1 [Dyspnoea is a threatening sensation of respir... \n", - "2 [Measuring quality of care is essential to imp... \n", - "3 [Extranodal natural killer/T-cell lymphoma, na... \n", - "4 [Obesity has become a significant public healt... \n", - "5 [Rhabdomyosarcoma (RMS) is a highly malignant ... \n", - "6 [To compare the safety and efficacy of patient... \n", - "7 [Mental fatigue can negatively impact on subma... \n", - "8 [Bleach is widely used for household cleaning.... \n", - "9 [Proliferative vitreoretinopathy (PVR) is a bl... \n", - "10 [Morbidly obese patients usually present vitam... \n", - "11 [Excessive consumption of ethanol is one of th... \n", - "12 [Rapa Nui (Easter Island), located in the east... \n", - "13 [Formalin injection induces nociceptive bahavi... \n", - "14 [The purpose of this study was to investigate ... \n", - "15 [Immunosuppressant agents modulate the activit... \n", - "16 [The isoflavone puerarin [7-hydroxy-3-(4-hydro... \n", - "17 [Mineral bone disorder (MBD) is prevalent amon... \n", - "18 [Trauma centers often receive transfers from l... \n", - "19 [This study examined links between DNA methyla... \n", - "20 [To review UK newspaper reports relating to Do... \n", - "21 [Phospholipase C epsilon 1 (PLCE1) plays a cru... \n", - "22 [To evaluate the risk of reclassification on s... \n", - "23 [Evidence-based fall prevention programs prima... \n", - "24 [To investigate whether premorbid maternal and... \n", + "0 [Fibrinogen is a coagulation/inflammatory biom... \n", + "1 [Poultry remains a major source of foodborne b... \n", + "2 [To investigate the eff ect of intestinal tref... \n", + "3 [Active engagement in education improves learn... \n", + "4 [This study aims to evaluate the relationship ... \n", + "5 [To evaluate whether the use of intraoperative... \n", + "6 [Local anesthetics are commonly used for the t... \n", + "7 [Detection of anaplastic lymphoma kinase (ALK)... \n", + "8 [In the United States, patients who enroll in ... \n", + "9 [Previous studies have proved the feasibility ... \n", + "10 [Transient receptor potential ankyrin 1 (TRPA1... \n", + "11 [To investigate the recent incidence of T1D in... \n", + "12 [Colorectal resection in cirrhotic patients is... \n", + "13 [The ability of Yersinia pestis to form a biof... \n", + "14 [Proliferative vitreoretinopathy (PVR) is a bl... \n", + "15 [The aims of this study were to determine the ... \n", + "16 [According to reports in the literature, more ... \n", + "17 [In primary care surveillance systems based on... \n", + "18 [Despite concerns about increased sexual haras... \n", + "19 [Patients infected with influenza A(H7N9) viru... \n", + "20 [Atrial fibrillation (AF) and chronic kidney d... \n", + "21 [The aim of this study was to analyze the bioc... \n", + "22 [The ADAMTS13-von Willebrand factor (vWF) axis... \n", + "23 [There is no consensus about the ideal method ... \n", + "24 [Low-grade chronic inflammation is a cardinal ... \n", "\n", " answer \\\n", - "0 During outbreaks, one-way behaviour change com... \n", - "1 Relieving dyspnoea by NIV in patients with ALS... \n", - "2 Renovating the interior of a primary care offi... \n", - "3 Our results showed that expression of CD30 was... \n", - "4 Obesity and its resultant medical comorbiditie... \n", - "5 MiR-378a-3p may function as a tumour suppresso... \n", - "6 Penile plication is a safe and effective techn... \n", - "7 Near identical responses in performance and ph... \n", - "8 Frequent use of bleach for home-cleaning is as... \n", - "9 Our findings indicate a role of acetylation in... \n", - "10 Low levels of vitamin D or hyperparathyroidism... \n", - "11 Based on studies of human, mouse, and guinea p... \n", - "12 These genetic results can be explained by one ... \n", - "13 The results of this study suggest the existenc... \n", - "14 RT-3D-TEE provides reliable diagnostic evidenc... \n", - "15 Our results suggest that drugs that can modula... \n", - "16 This study indicates that the effect of puerar... \n", - "17 The relationship between FGF-23 and vitamin D ... \n", - "18 A significant number of patients transferred t... \n", - "19 We identified a novel biologically plausible c... \n", - "20 Regarding DNACPR decision-making, the predomin... \n", - "21 These results suggest that variation in PLCE1 ... \n", - "22 AA men with VLR prostate cancer followed on AS... \n", - "23 Findings have implications for identifying at-... \n", - "24 Pediatricians need to be aware that children w... \n", + "0 Higher fibrinogen level is independently linke... \n", + "1 Over the 42 d experiment, the cecal bacterial ... \n", + "2 ITF increases the transcriptional activity of ... \n", + "3 Discussion with well-prepared peers during sem... \n", + "4 Serum TARC levels are well correlated with blo... \n", + "5 These results were similar to those published ... \n", + "6 Our data provide evidence of the detrimental e... \n", + "7 This so-called \"ALK-Harmonization-Study\" shows... \n", + "8 Results of clinical trials for advanced pancre... \n", + "9 An aggressive approach for stage II pancreatic... \n", + "10 The TRPA1 cation channel was found to be funct... \n", + "11 The incidence of T1D has stopped increasing in... \n", + "12 Portal decompression via TIPS placement may en... \n", + "13 HmsA potentially functions as an activator of ... \n", + "14 Our findings indicate a role of acetylation in... \n", + "15 Previous failed EV should be predictive of poo... \n", + "16 The concomitant administration of certain comm... \n", + "17 Post-stratification using external administrat... \n", + "18 A substantial proportion of men and women repo... \n", + "19 Antihuman C5a antibody treatment remarkably re... \n", + "20 Low eGFR at baseline was an independent predic... \n", + "21 It was found that radiotherapy increased oxida... \n", + "22 Our data show that the ADAMTS13-vWF axis is pa... \n", + "23 Repeat EUS-FNA in pancreatic lesions is necess... \n", + "24 These data advance the concept that macrophage... \n", "\n", " predicted_answer doc_mrr_evaluator \\\n", - "0 During the Nipah virus outbreak in Bangladesh,... 1.0 \n", - "1 Yes, relieving dyspnoea by non-invasive ventil... 1.0 \n", - "2 Based on the information provided, patient sat... 1.0 \n", - "3 Based on the provided context information, CD3... 1.0 \n", - "4 Yes, according to the first context provided, ... 1.0 \n", - "5 Yes, deep sequencing of the microRNA profile i... 1.0 \n", - "6 Based on the context information provided, dor... 1.0 \n", - "7 Based on the given context information, it can... 1.0 \n", - "8 Yes, women using bleach for home cleaning are ... 1.0 \n", - "9 Yes, trichostatin A inhibits Retinal Pigmented... 1.0 \n", - "10 Based on the first context provided, the study... 1.0 \n", - "11 Yes, alcohol disrupts levels and function of t... 1.0 \n", - "12 Yes, genome-wide ancestry patterns in Rapanui ... 1.0 \n", - "13 Yes, termination of nociceptive behavior at th... 1.0 \n", - "14 Yes, real-time three-dimensional transesophage... 1.0 \n", - "15 Yes, thalidomide has been shown to control adi... 1.0 \n", - "16 Yes, puerarin inhibits the inflammatory respon... 1.0 \n", - "17 Yes, according to the information provided in ... 1.0 \n", - "18 Secondary overtriage to a Level I trauma cente... 1.0 \n", - "19 Yes, methylation of the FGFR2 gene is signific... 1.0 \n", - "20 Yes, the two decades of British newspaper cove... 1.0 \n", - "21 Yes, the PLCE1 haplotypes (A2274223C3765524T79... 1.0 \n", - "22 Yes, reclassification rates are higher among A... 1.0 \n", - "23 No, the context information provided focuses o... 1.0 \n", - "24 Yes, maternal and childhood psychological fact... 1.0 \n", + "0 Yes, higher fibrinogen level is independently ... 1.0 \n", + "1 Yes, successional changes in the chicken cecal... 1.0 \n", + "2 Yes, ITF increases the transcriptional activit... 1.0 \n", + "3 Yes, peer-instructed seminar attendance is ass... 1.0 \n", + "4 Yes, serum TARC levels are strongly correlated... 1.0 \n", + "5 No, the study found that the incidence of post... 1.0 \n", + "6 Yes, lidocaine synergistically increases the d... 1.0 \n", + "7 Yes, multicenter immunohistochemical ALK-testi... 1.0 \n", + "8 The generalizability of trial results to elder... 1.0 \n", + "9 Yes, vascular reconstruction plays an importan... 1.0 \n", + "10 Yes, transient receptor potential ankyrin 1 (T... 1.0 \n", + "11 No, there was no significant increase in the i... 1.0 \n", + "12 Yes, transjugular intrahepatic portosystemic s... 1.0 \n", + "13 Yes, plasmid pPCP1-derived sRNA HmsA promotes ... 1.0 \n", + "14 Yes, trichostatin A (TSA) inhibits Retinal Pig... 1.0 \n", + "15 The phrase \"bridges are burned\" typically impl... 1.0 \n", + "16 Yes, ifenprodil at a non-active dose (10mg/kg)... 1.0 \n", + "17 Yes, improving disease incidence estimates in ... 1.0 \n", + "18 Yes, greater unit support is associated with d... 1.0 \n", + "19 Yes, treatment with the anti-C5a antibody (IFX... 1.0 \n", + "20 Yes, impaired renal function is associated wit... 1.0 \n", + "21 Yes, melatonin prevents radiation-induced oxid... 1.0 \n", + "22 Yes, the ADAMTS13-von Willebrand factor axis i... 1.0 \n", + "23 Yes, repeat endoscopic ultrasound fine needle ... 1.0 \n", + "24 Yes, TRAIL receptor deletion in mice suppresse... 1.0 \n", "\n", " faithfulness sas_evaluator \n", - "0 1.0 0.688929 \n", - "1 1.0 0.811266 \n", - "2 1.0 0.849888 \n", - "3 1.0 0.775011 \n", - "4 1.0 0.845495 \n", - "5 1.0 0.661563 \n", - "6 1.0 0.804615 \n", - "7 1.0 0.849995 \n", - "8 1.0 0.899928 \n", - "9 1.0 0.466138 \n", - "10 1.0 0.747388 \n", - "11 1.0 0.863766 \n", - "12 1.0 0.517162 \n", - "13 1.0 0.901174 \n", - "14 1.0 0.596325 \n", - "15 1.0 0.692221 \n", - "16 1.0 0.894604 \n", - "17 1.0 0.730452 \n", - "18 1.0 0.709596 \n", - "19 1.0 0.490618 \n", - "20 1.0 0.608133 \n", - "21 1.0 0.613439 \n", - "22 1.0 0.573658 \n", - "23 1.0 0.572017 \n", - "24 1.0 0.788478 " + "0 1.000000 0.892444 \n", + "1 1.000000 0.635755 \n", + "2 1.000000 0.898495 \n", + "3 1.000000 0.881314 \n", + "4 1.000000 0.823395 \n", + "5 1.000000 0.772307 \n", + "6 1.000000 0.579498 \n", + "7 1.000000 0.587502 \n", + "8 1.000000 0.736484 \n", + "9 1.000000 0.647735 \n", + "10 1.000000 0.700875 \n", + "11 1.000000 0.703463 \n", + "12 0.833333 0.783859 \n", + "13 1.000000 0.765619 \n", + "14 1.000000 0.492036 \n", + "15 0.800000 0.552325 \n", + "16 1.000000 0.532008 \n", + "17 1.000000 0.584080 \n", + "18 1.000000 0.698656 \n", + "19 1.000000 0.685870 \n", + "20 1.000000 0.760564 \n", + "21 1.000000 0.944898 \n", + "22 1.000000 0.691734 \n", + "23 1.000000 0.767621 \n", + "24 1.000000 0.677513 " ] }, - "execution_count": 18, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -2984,7 +2186,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -2996,14 +2198,8 @@ "outputs": [ { "data": { - "application/vnd.google.colaboratory.intrinsic+json": { - "summary": "{\n \"name\": \"pd\",\n \"rows\": 6,\n \"fields\": [\n {\n \"column\": \"question\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Is termination of Nociceptive Bahaviour at the End of Phase 2 of Formalin Test Attributable to Endogenous Inhibitory Mechanisms , but not by Opioid Receptors Activation?\",\n \"Are women using bleach for home cleaning at increased risk of non-allergic asthma?\",\n \"Do genome-wide ancestry patterns in Rapanui suggest pre-European admixture with Native Americans?\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"contexts\",\n \"properties\": {\n \"dtype\": \"object\",\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"The results of this study suggest the existence of an active inhibitory mechanism, other than the endogenous opioids, that is responsible for termination of nociceptive behaviour at the end of formalin test.\",\n \"Frequent use of bleach for home-cleaning is associated with non-allergic adult-onset asthma, elevated neutrophil counts and lower-airway symptoms in women.\",\n \"These genetic results can be explained by one or more pre-European trans-Pacific contacts.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"predicted_answer\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6,\n \"samples\": [\n \"Yes, termination of nociceptive behavior at the end of phase 2 of the Formalin test appears to be attributable to endogenous inhibitory mechanisms rather than opioid receptors activation. This is supported by the observation that naloxone, a non-selective antagonist of opioid receptors, decreased nociception in phase 2A but had no effect on the delayed termination of the Formalin test. Additionally, the study specifically investigated active inhibitory mechanisms that lead to termination of nociceptive response in phase II, suggesting that other mechanisms besides opioid receptors may be involved.\",\n \"Yes, women using bleach for home cleaning are at an increased risk of non-allergic asthma. The study showed that bleach use was significantly associated with non-allergic asthma, particularly non-allergic adult-onset asthma. Women using bleach frequently were more likely to have current asthma compared to non-users, and there were positive associations found between bleach use and bronchial hyperresponsiveness, asthma-like symptoms, and chronic cough among women without allergic sensitization.\",\n \"Yes, genome-wide ancestry patterns in Rapanui suggest pre-European admixture with Native Americans, as evidenced by statistical support for Native American admixture dating to AD 1280-1495.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"doc_mrr_evaluator\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.0,\n \"min\": 1.0,\n \"max\": 1.0,\n \"num_unique_values\": 1,\n \"samples\": [\n 1.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"faithfulness\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.0,\n \"min\": 1.0,\n \"max\": 1.0,\n \"num_unique_values\": 1,\n \"samples\": [\n 1.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sas_evaluator\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.22366097741438715,\n \"min\": 0.46613821387290955,\n \"max\": 0.9011739492416382,\n \"num_unique_values\": 6,\n \"samples\": [\n 0.9011739492416382\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}", - "type": "dataframe" - }, "text/html": [ - "\n", - "
\n", - "
\n", + "
\n", "\n", - "\n", - " \n", - "
\n", - "\n", - "\n", - "
\n", - " \n", - "\n", - "\n", - "\n", - " \n", - "
\n", - "
\n", - "
\n" + "" ], "text/plain": [ " question \\\n", - "13 Is termination of Nociceptive Bahaviour at the... \n", - "8 Are women using bleach for home cleaning at in... \n", - "16 Does puerarin inhibit the inflammatory respons... \n", - "9 Does trichostatin A inhibit Retinal Pigmented ... \n", - "19 Is methylation of the FGFR2 gene associated wi... \n", - "12 Do genome-wide ancestry patterns in Rapanui su... \n", + "21 Does melatonin prevent radiation-induced oxida... \n", + "2 Does [ ITF increase the transcriptional activi... \n", + "0 Is higher fibrinogen level independently linke... \n", + "14 Does trichostatin A inhibit Retinal Pigmented ... \n", + "16 Do the effects of ifenprodil on the activity o... \n", + "15 Is endo first appropriate in some patients wit... \n", "\n", " contexts \\\n", - "13 [Formalin injection induces nociceptive bahavi... \n", - "8 [Bleach is widely used for household cleaning.... \n", - "16 [The isoflavone puerarin [7-hydroxy-3-(4-hydro... \n", - "9 [Proliferative vitreoretinopathy (PVR) is a bl... \n", - "19 [This study examined links between DNA methyla... \n", - "12 [Rapa Nui (Easter Island), located in the east... \n", + "21 [The aim of this study was to analyze the bioc... \n", + "2 [To investigate the eff ect of intestinal tref... \n", + "0 [Fibrinogen is a coagulation/inflammatory biom... \n", + "14 [Proliferative vitreoretinopathy (PVR) is a bl... \n", + "16 [According to reports in the literature, more ... \n", + "15 [The aims of this study were to determine the ... \n", "\n", " answer \\\n", - "13 The results of this study suggest the existenc... \n", - "8 Frequent use of bleach for home-cleaning is as... \n", - "16 This study indicates that the effect of puerar... \n", - "9 Our findings indicate a role of acetylation in... \n", - "19 We identified a novel biologically plausible c... \n", - "12 These genetic results can be explained by one ... \n", + "21 It was found that radiotherapy increased oxida... \n", + "2 ITF increases the transcriptional activity of ... \n", + "0 Higher fibrinogen level is independently linke... \n", + "14 Our findings indicate a role of acetylation in... \n", + "16 The concomitant administration of certain comm... \n", + "15 Previous failed EV should be predictive of poo... \n", "\n", " predicted_answer doc_mrr_evaluator \\\n", - "13 Yes, termination of nociceptive behavior at th... 1.0 \n", - "8 Yes, women using bleach for home cleaning are ... 1.0 \n", - "16 Yes, puerarin inhibits the inflammatory respon... 1.0 \n", - "9 Yes, trichostatin A inhibits Retinal Pigmented... 1.0 \n", - "19 Yes, methylation of the FGFR2 gene is signific... 1.0 \n", - "12 Yes, genome-wide ancestry patterns in Rapanui ... 1.0 \n", + "21 Yes, melatonin prevents radiation-induced oxid... 1.0 \n", + "2 Yes, ITF increases the transcriptional activit... 1.0 \n", + "0 Yes, higher fibrinogen level is independently ... 1.0 \n", + "14 Yes, trichostatin A (TSA) inhibits Retinal Pig... 1.0 \n", + "16 Yes, ifenprodil at a non-active dose (10mg/kg)... 1.0 \n", + "15 The phrase \"bridges are burned\" typically impl... 1.0 \n", "\n", " faithfulness sas_evaluator \n", - "13 1.0 0.901174 \n", - "8 1.0 0.899928 \n", - "16 1.0 0.894604 \n", - "9 1.0 0.466138 \n", - "19 1.0 0.490618 \n", - "12 1.0 0.517162 " + "21 1.0 0.944898 \n", + "2 1.0 0.898495 \n", + "0 1.0 0.892444 \n", + "14 1.0 0.492036 \n", + "16 1.0 0.532008 \n", + "15 0.8 0.552325 " ] }, - "execution_count": 19, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -3396,7 +2384,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.1" + "version": "3.9.6" }, "vscode": { "interpreter": { From 7c1caf94992be60bc0fe82d287fe08b33ebe2114 Mon Sep 17 00:00:00 2001 From: Amna Mubashar Date: Fri, 13 Dec 2024 12:16:36 +0100 Subject: [PATCH 4/4] Small fix based on review --- ...ng_Documents_and_Queries_by_Language.ipynb | 43 +++++++++++-------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb b/tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb index 6d745d7..d82c604 100644 --- a/tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb +++ b/tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb @@ -112,11 +112,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": { "id": "mN2fFuWWP_8D" }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages/pypdf/_crypt_providers/_cryptography.py:32: CryptographyDeprecationWarning: ARC4 has been moved to cryptography.hazmat.decrepit.ciphers.algorithms.ARC4 and will be removed from this module in 48.0.0.\n", + " from cryptography.hazmat.primitives.ciphers.algorithms import AES, ARC4\n" + ] + } + ], "source": [ "from haystack import Document, Pipeline\n", "from haystack.document_stores.in_memory import InMemoryDocumentStore\n", @@ -191,7 +200,7 @@ "source": [ "language_classifier = DocumentLanguageClassifier(languages=[\"en\", \"fr\", \"es\"])\n", "router_rules = {\"en\": {\"field\": \"meta.language\", \"operator\": \"==\", \"value\": \"en\"}, \n", - " \"fr\": {\"field\": \"meta.language\", \"operator\": \"==\", \"value\": \"fe\"}, \n", + " \"fr\": {\"field\": \"meta.language\", \"operator\": \"==\", \"value\": \"fr\"}, \n", " \"es\": {\"field\": \"meta.language\", \"operator\": \"==\", \"value\": \"es\"}}\n", "router = MetadataRouter(rules=router_rules)" ] @@ -228,7 +237,7 @@ { "data": { "text/plain": [ - "\n", + "\n", "πŸš… Components\n", " - language_classifier: DocumentLanguageClassifier\n", " - router: MetadataRouter\n", @@ -305,11 +314,9 @@ { "data": { "text/plain": [ - "{'router': {'unmatched': [Document(id=ea7ea338874232de2d8105a258813f50345db82772e21ad2c4549dbb7adce8a3, content: 'Super appartement. Juste au dessus de plusieurs bars qui ferment trΓ¨s tard. A savoir Γ  l'avance. (Bo...', meta: {'language': 'fr'}),\n", - " Document(id=6b64c8a60543ee32b81cd39bc8d6e09fae4bff1b22c6ccdcf414db26fa354e7a, content: 'Un peu salΓ© surtout le sol. Manque de service et de souplesse', meta: {'language': 'fr'}),\n", - " Document(id=b1be23526f19a8af80a190e775bfd05e65878e585529037cb45b47267a4eaa98, content: 'Nous avons passΓ© un sΓ©jour formidable. Merci aux personnes , le bonjours Γ  Ricardo notre taxi man, t...', meta: {'language': 'fr'})]},\n", + "{'router': {'unmatched': []},\n", " 'en_writer': {'documents_written': 2},\n", - " 'fr_writer': {'documents_written': 0},\n", + " 'fr_writer': {'documents_written': 3},\n", " 'es_writer': {'documents_written': 2}}" ] }, @@ -349,7 +356,7 @@ "output_type": "stream", "text": [ "English documents: [Document(id=8f64ab234c6a5d5652d02bed144d069ec6e988903b071d16fffbf400abfc1047, content: 'The keypad with a code is convenient and the location is convenient. Basically everything else, very...', meta: {'language': 'en'}), Document(id=d4d878288efba5e28a43ae0195e43dadd0298fe36d3d9b3075c5c5120d27763e, content: 'It is very central and appartement has a nice appearance (even though a lot IKEA stuff), *W A R N I ...', meta: {'language': 'en'})]\n", - "French documents: []\n", + "French documents: [Document(id=ea7ea338874232de2d8105a258813f50345db82772e21ad2c4549dbb7adce8a3, content: 'Super appartement. Juste au dessus de plusieurs bars qui ferment trΓ¨s tard. A savoir Γ  l'avance. (Bo...', meta: {'language': 'fr'}), Document(id=6b64c8a60543ee32b81cd39bc8d6e09fae4bff1b22c6ccdcf414db26fa354e7a, content: 'Un peu salΓ© surtout le sol. Manque de service et de souplesse', meta: {'language': 'fr'}), Document(id=b1be23526f19a8af80a190e775bfd05e65878e585529037cb45b47267a4eaa98, content: 'Nous avons passΓ© un sΓ©jour formidable. Merci aux personnes , le bonjours Γ  Ricardo notre taxi man, t...', meta: {'language': 'fr'})]\n", "Spanish documents: [Document(id=72b094c163b22a660528bc5adbdf0fecf96b4b4d753c1b117f15dba482d2f948, content: 'El apartamento estaba genial y muy cΓ©ntrico, todo a mano. Al lado de la librerΓ­a Lello y De la Torre...', meta: {'language': 'es'}), Document(id=4b37b8bdfffccfb3211ea167b4fdc5121ca51fc5f869b4f834e8da473f0d3353, content: 'CΓ©ntrico. Muy cΓ³modo para moverse y ver Oporto. Edificio con terraza propia en la ΓΊltima planta. Tod...', meta: {'language': 'es'})]\n" ] } @@ -458,7 +465,7 @@ { "data": { "text/plain": [ - "\n", + "\n", "πŸš… Components\n", " - router: TextLanguageRouter\n", " - en_retriever: InMemoryBM25Retriever\n", @@ -540,7 +547,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 21, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -571,7 +578,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -584,12 +591,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "ChatMessage(content='Yes, the apartment is conveniently located.', role=, name=None, meta={'model': 'gpt-4o-mini-2024-07-18', 'index': 0, 'finish_reason': 'stop', 'usage': {'completion_tokens': 8, 'prompt_tokens': 365, 'total_tokens': 373, 'prompt_tokens_details': {'cached_tokens': 0, 'audio_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0, 'audio_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}}})\n" + "Yes, the apartment is conveniently located.\n" ] } ], "source": [ - "print(result[\"llm\"][\"replies\"][0])" + "print(result[\"llm\"][\"replies\"][0].text)" ] }, { @@ -603,7 +610,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 23, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -634,7 +641,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 24, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -647,12 +654,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "SΓ­, el desayuno es descrito como estupendo.\n" + "SΓ­, el desayuno es considerado estupendo.\n" ] } ], "source": [ - "print(result[\"llm\"][\"replies\"][0].content)" + "print(result[\"llm\"][\"replies\"][0].text)" ] }, {