From 086a13e0104c10fa1e10d1217c8b2bddc1b191e3 Mon Sep 17 00:00:00 2001
From: Amna Mubashar <amnahkhan.ak@gmail.com>
Date: Sun, 8 Dec 2024 23:19:42 +0100
Subject: [PATCH] Updated tutorial 32

---
 ...le_Type_Preprocessing_Index_Pipeline.ipynb | 148 ++++++++--
 ...ng_Documents_and_Queries_by_Language.ipynb | 264 ++++++------------
 2 files changed, 211 insertions(+), 201 deletions(-)

diff --git a/tutorials/30_File_Type_Preprocessing_Index_Pipeline.ipynb b/tutorials/30_File_Type_Preprocessing_Index_Pipeline.ipynb
index 45fb879..8fc936b 100644
--- a/tutorials/30_File_Type_Preprocessing_Index_Pipeline.ipynb
+++ b/tutorials/30_File_Type_Preprocessing_Index_Pipeline.ipynb
@@ -91,11 +91,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {
     "id": "CkvJIU7FmDf9"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
    "source": [
     "from haystack.telemetry import tutorial_running\n",
     "\n",
@@ -117,9 +126,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['recipe_files/vegan_flan_recipe.md',\n",
+       " 'recipe_files/vegan_keto_eggplant_recipe_fixed.pdf',\n",
+       " 'recipe_files/vegan_sunflower_hemp_cheese_recipe.txt']"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "import gdown\n",
     "\n",
@@ -180,7 +202,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {
     "id": "hCWlpiQCBYOg"
    },
@@ -201,7 +223,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {
     "id": "TVXSX0GHBtdj"
    },
@@ -222,7 +244,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {
     "id": "4yGXKHEXIZxi"
    },
@@ -251,7 +273,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -260,7 +282,39 @@
     "id": "gafXWtNYfNbr",
     "outputId": "10f351de-ac09-4273-85a2-ac7b59fb2f77"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<haystack.core.pipeline.pipeline.Pipeline object at 0x320eb2790>\n",
+       "🚅 Components\n",
+       "  - file_type_router: FileTypeRouter\n",
+       "  - text_file_converter: TextFileToDocument\n",
+       "  - markdown_converter: MarkdownToDocument\n",
+       "  - pypdf_converter: PyPDFToDocument\n",
+       "  - document_joiner: DocumentJoiner\n",
+       "  - document_cleaner: DocumentCleaner\n",
+       "  - document_splitter: DocumentSplitter\n",
+       "  - document_embedder: SentenceTransformersDocumentEmbedder\n",
+       "  - document_writer: DocumentWriter\n",
+       "🛤️ Connections\n",
+       "  - file_type_router.text/plain -> text_file_converter.sources (List[Union[str, Path, ByteStream]])\n",
+       "  - file_type_router.application/pdf -> pypdf_converter.sources (List[Union[str, Path, ByteStream]])\n",
+       "  - file_type_router.text/markdown -> markdown_converter.sources (List[Union[str, Path, ByteStream]])\n",
+       "  - text_file_converter.documents -> document_joiner.documents (List[Document])\n",
+       "  - markdown_converter.documents -> document_joiner.documents (List[Document])\n",
+       "  - pypdf_converter.documents -> document_joiner.documents (List[Document])\n",
+       "  - document_joiner.documents -> document_cleaner.documents (List[Document])\n",
+       "  - document_cleaner.documents -> document_splitter.documents (List[Document])\n",
+       "  - document_splitter.documents -> document_embedder.documents (List[Document])\n",
+       "  - document_embedder.documents -> document_writer.documents (List[Document])"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "preprocessing_pipeline.connect(\"file_type_router.text/plain\", \"text_file_converter.sources\")\n",
     "preprocessing_pipeline.connect(\"file_type_router.application/pdf\", \"pypdf_converter.sources\")\n",
@@ -325,7 +379,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -357,7 +411,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -366,14 +420,35 @@
     "id": "_s--8xEWq8Y9",
     "outputId": "1c050d5f-f2ae-4cd3-e0d4-533397a6af63"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<haystack.core.pipeline.pipeline.Pipeline object at 0x3737807f0>\n",
+       "🚅 Components\n",
+       "  - embedder: SentenceTransformersTextEmbedder\n",
+       "  - retriever: InMemoryEmbeddingRetriever\n",
+       "  - chat_prompt_builder: ChatPromptBuilder\n",
+       "  - llm: HuggingFaceAPIChatGenerator\n",
+       "🛤️ Connections\n",
+       "  - embedder.embedding -> retriever.query_embedding (List[float])\n",
+       "  - retriever.documents -> chat_prompt_builder.documents (List[Document])\n",
+       "  - chat_prompt_builder.prompt -> llm.messages (List[ChatMessage])"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "from haystack.components.embedders import SentenceTransformersTextEmbedder\n",
     "from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever\n",
-    "from haystack.components.builders import PromptBuilder\n",
-    "from haystack.components.generators import HuggingFaceAPIGenerator\n",
+    "from haystack.components.builders import ChatPromptBuilder\n",
+    "from haystack.dataclasses import ChatMessage\n",
+    "from haystack.components.generators.chat import HuggingFaceAPIChatGenerator\n",
     "\n",
-    "template = \"\"\"\n",
+    "template = [ChatMessage.from_user(\"\"\"\n",
     "Answer the questions based on the given context.\n",
     "\n",
     "Context:\n",
@@ -383,19 +458,19 @@
     "\n",
     "Question: {{ question }}\n",
     "Answer:\n",
-    "\"\"\"\n",
+    "\"\"\")]\n",
     "pipe = Pipeline()\n",
     "pipe.add_component(\"embedder\", SentenceTransformersTextEmbedder(model=\"sentence-transformers/all-MiniLM-L6-v2\"))\n",
     "pipe.add_component(\"retriever\", InMemoryEmbeddingRetriever(document_store=document_store))\n",
-    "pipe.add_component(\"prompt_builder\", PromptBuilder(template=template))\n",
+    "pipe.add_component(\"chat_prompt_builder\", ChatPromptBuilder(template=template))\n",
     "pipe.add_component(\n",
     "    \"llm\",\n",
-    "    HuggingFaceAPIGenerator(api_type=\"serverless_inference_api\", api_params={\"model\": \"HuggingFaceH4/zephyr-7b-beta\"}),\n",
+    "    HuggingFaceAPIChatGenerator(api_type=\"serverless_inference_api\", api_params={\"model\": \"HuggingFaceH4/zephyr-7b-beta\"}),\n",
     ")\n",
     "\n",
     "pipe.connect(\"embedder.embedding\", \"retriever.query_embedding\")\n",
-    "pipe.connect(\"retriever\", \"prompt_builder.documents\")\n",
-    "pipe.connect(\"prompt_builder\", \"llm\")"
+    "pipe.connect(\"retriever\", \"chat_prompt_builder.documents\")\n",
+    "pipe.connect(\"chat_prompt_builder.prompt\", \"llm.messages\")"
    ]
   },
   {
@@ -409,11 +484,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {
     "id": "qDqrU5emtBWQ"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Batches: 100%|██████████| 1/1 [00:00<00:00,  3.20it/s]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'llm': {'replies': [ChatMessage(content=\"For vegan keto eggplant lasagna:\\n- 2 large eggplants\\n- Hella salt (optional)\\n- 1/2 cup store-bought vegan mozzarella cheese (for topping)\\n- Pesto:\\n  - 4 oz basil (generally one large clamshell or 2 small ones)\\n  - 1/4 cup almonds\\n  - 1/4 cup nutritional yeast\\n  - 1/4 cup olive oil\\n- Spinach tofu ricotta:\\n  - 1 recipe spinach tofu ricotta\\n- 1 tsp garlic powder\\n- Juice of half a lemon\\n- Salt to taste\\n\\nFor macadamia nut cheese:\\n- 1 cup macadamia nuts (unsalted and unroasted)\\n- Salt (optional)\\n\\nInstructions:\\n1. Preheat oven to 400°F.\\n2. Slice eggplants into 1/4 inch thick slices and rub both sides with salt. Let sit for 20-30 minutes to extract moisture. Rinse with water and pat dry.\\n3. Roast the eggplant in the oven for about 20 minutes or until they're soft and brown in spots, rotating the pans halfway through.\\n4. Reduce oven temperature to 350°F.\\n5. In a separate bowl, mix together the store-bought vegan mozzarella cheese (for topping) with spinach tofu ricotta.\\n6. Assemble the lasagna: spread a layer of roasted eggplant at the bottom of the casserole dish, followed by a layer of pesto and a layer of the cheese mixture. Repeat until all ingredients are used, finishing with a layer of roasted eggplant. Sprinkle the remaining store-bought vegan mozzarella cheese (for topping) on top.\\n7. Bake for 25 minutes. Optionally, broil for 1-2 minutes at the end to melt the cheese.\\n\\nFor vegan persimmon flan:\\n- 1/2 cup persimmon pulp, strained (about 2 medium persimmons)\\n- 1 tbsp cornstarch\\n- 1/2 tsp agar agar\\n-\", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'HuggingFaceH4/zephyr-7b-beta', 'finish_reason': 'length', 'index': 0, 'usage': ChatCompletionOutputUsage(completion_tokens=512, prompt_tokens=2276, total_tokens=2788)})]}}"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "question = (\n",
     "    \"What ingredients would I need to make vegan keto eggplant lasagna, vegan persimmon flan, and vegan hemp cheese?\"\n",
@@ -422,8 +515,8 @@
     "pipe.run(\n",
     "    {\n",
     "        \"embedder\": {\"text\": question},\n",
-    "        \"prompt_builder\": {\"question\": question},\n",
-    "        \"llm\": {\"generation_kwargs\": {\"max_new_tokens\": 350}},\n",
+    "        \"chat_prompt_builder\": {\"question\": question},\n",
+    "        \n",
     "    }\n",
     ")"
    ]
@@ -434,12 +527,7 @@
     "id": "ZJueu_V4KP6w"
    },
    "source": [
-    "```python\n",
-    "{'llm': {'replies': [\"\\n\\nVegan Keto Eggplant Lasagna:\\n\\nIngredients:\\n- 2 large eggplants\\n- A lot of salt (you should have this in your house already)\\n- 1/2 cup store-bought vegan mozzarella (for topping)\\n\\nPesto:\\n- 4 oz basil (generally one large clamshell or 2 small ones)\\n- 1/4 cup almonds\\n- 1/4 cup nutritional yeast\\n- 1/4 cup olive oil\\n- 1 recipe vegan pesto (you can find this in the recipe)\\n- 1 recipe spinach tofu ricotta (you can find this in the recipe)\\n- 1 tsp garlic powder\\n- Juice of half a lemon\\n- Salt to taste\\n\\nSpinach Tofu Ricotta:\\n- 10 oz firm or extra firm tofu\\n- Juice of 1 lemon\\n- Garlic powder to taste\\n- Salt to taste\\n\\nInstructions:\\n1. Slice the eggplants into 1/4 inch thick slices. Some slices will need to be scrapped because it's difficult to get them all uniformly thin. Use them in soup or something, IDK, man.\\n2. Take the eggplant slices and rub both sides with salt. Don't be shy about how much, you're gonna rinse it off anyway.\\n3. Put them in a colander with something underneath it and let them sit for half an hour. This draws the water out so that the egg\"],\n",
-    "  'meta': [{'model': 'HuggingFaceH4/zephyr-7b-beta',\n",
-    "    ...\n",
-    "    }]}}\n",
-    "```"
+    "{'llm': {'replies': [ChatMessage(content=\"For vegan keto eggplant lasagna:\\n- 2 large eggplants\\n- Hella salt (optional)\\n- 1/2 cup store-bought vegan mozzarella cheese (for topping)\\n- Pesto:\\n  - 4 oz basil (generally one large clamshell or 2 small ones)\\n  - 1/4 cup almonds\\n  - 1/4 cup nutritional yeast\\n  - 1/4 cup olive oil\\n- Spinach tofu ricotta:\\n  - 1 recipe spinach tofu ricotta\\n- 1 tsp garlic powder\\n- Juice of half a lemon\\n- Salt to taste\\n\\nFor macadamia nut cheese:\\n- 1 cup macadamia nuts (unsalted and unroasted)\\n- Salt (optional)\\n\\nInstructions:\\n1. Preheat oven to 400°F.\\n2. Slice eggplants into 1/4 inch thick slices and rub both sides with salt. Let sit for 20-30 minutes to extract moisture. Rinse with water and pat dry.\\n3. Roast the eggplant in the oven for about 20 minutes or until they're soft and brown in spots, rotating the pans halfway through.\\n4. Reduce oven temperature to 350°F.\\n5. In a separate bowl, mix together the store-bought vegan mozzarella cheese (for topping) with spinach tofu ricotta.\\n6. Assemble the lasagna: spread a layer of roasted eggplant at the bottom of the casserole dish, followed by a layer of pesto and a layer of the cheese mixture. Repeat until all ingredients are used, finishing with a layer of roasted eggplant. Sprinkle the remaining store-bought vegan mozzarella cheese (for topping) on top.\\n7. Bake for 25 minutes. Optionally, broil for 1-2 minutes at the end to melt the cheese.\\n\\nFor vegan persimmon flan:\\n- 1/2 cup persimmon pulp, strained (about 2 medium persimmons)\\n- 1 tbsp cornstarch\\n- 1/2 tsp agar agar\\n-\", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'HuggingFaceH4/zephyr-7b-beta', 'finish_reason': 'length', 'index': 0, 'usage': ChatCompletionOutputUsage(completion_tokens=512, prompt_tokens=2276, total_tokens=2788)})]}}"
    ]
   },
   {
diff --git a/tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb b/tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb
index c4bdb5b..6d745d7 100644
--- a/tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb
+++ b/tutorials/32_Classifying_Documents_and_Queries_by_Language.ipynb
@@ -10,7 +10,7 @@
     "\n",
     "- **Level**: Beginner\n",
     "- **Time to complete**: 15 minutes\n",
-    "- **Components Used**: [`InMemoryDocumentStore`](https://docs.haystack.deepset.ai/docs/inmemorydocumentstore), [`DocumentLanguageClassifier`](https://docs.haystack.deepset.ai/docs/documentlanguageclassifier), [`MetadataRouter`](https://docs.haystack.deepset.ai/docs/metadatarouter), [`DocumentWriter`](https://docs.haystack.deepset.ai/docs/documentwriter), [`TextLanguageRouter`](https://docs.haystack.deepset.ai/docs/textlanguagerouter), [`DocumentJoiner`](https://docs.haystack.deepset.ai/docs/documentjoiner), [`InMemoryBM25Retriever`](https://docs.haystack.deepset.ai/docs/inmemorybm25retriever), [`PromptBuilder`](https://docs.haystack.deepset.ai/docs/promptbuilder), [`OpenAIGenerator`](https://docs.haystack.deepset.ai/docs/openaigenerator)\n",
+    "- **Components Used**: [`InMemoryDocumentStore`](https://docs.haystack.deepset.ai/docs/inmemorydocumentstore), [`DocumentLanguageClassifier`](https://docs.haystack.deepset.ai/docs/documentlanguageclassifier), [`MetadataRouter`](https://docs.haystack.deepset.ai/docs/metadatarouter), [`DocumentWriter`](https://docs.haystack.deepset.ai/docs/documentwriter), [`TextLanguageRouter`](https://docs.haystack.deepset.ai/docs/textlanguagerouter), [`DocumentJoiner`](https://docs.haystack.deepset.ai/docs/documentjoiner), [`InMemoryBM25Retriever`](https://docs.haystack.deepset.ai/docs/inmemorybm25retriever), [`ChatPromptBuilder`](https://docs.haystack.deepset.ai/docs/chatpromptbuilder), [`OpenAIChatGenerator`](https://docs.haystack.deepset.ai/docs/openaichatgenerator)\n",
     "- **Goal**: After completing this tutorial, you'll have learned how to build a Haystack pipeline to classify documents based on the (human) language they were written in.\n",
     "- Optionally, at the end you'll also incorporate language clasification and query routing into a RAG pipeline, so you can query documents based on the language a question was written in.\n",
     "\n",
@@ -57,7 +57,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -65,115 +65,7 @@
     "id": "lxgAfuxcdftS",
     "outputId": "36339d6b-f7a8-4686-911a-60642a8adbe6"
    },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Collecting haystack-ai\n",
-      "  Using cached haystack_ai-2.5.1-py3-none-any.whl.metadata (13 kB)\n",
-      "Collecting haystack-experimental (from haystack-ai)\n",
-      "  Using cached haystack_experimental-0.1.1-py3-none-any.whl.metadata (6.9 kB)\n",
-      "Requirement already satisfied: jinja2 in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from haystack-ai) (3.1.4)\n",
-      "Collecting lazy-imports (from haystack-ai)\n",
-      "  Using cached lazy_imports-0.3.1-py3-none-any.whl.metadata (10 kB)\n",
-      "Collecting more-itertools (from haystack-ai)\n",
-      "  Downloading more_itertools-10.5.0-py3-none-any.whl.metadata (36 kB)\n",
-      "Collecting networkx (from haystack-ai)\n",
-      "  Using cached networkx-3.3-py3-none-any.whl.metadata (5.1 kB)\n",
-      "Collecting numpy<2 (from haystack-ai)\n",
-      "  Using cached numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl.metadata (61 kB)\n",
-      "Collecting openai>=1.1.0 (from haystack-ai)\n",
-      "  Downloading openai-1.45.0-py3-none-any.whl.metadata (22 kB)\n",
-      "Collecting pandas (from haystack-ai)\n",
-      "  Using cached pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl.metadata (19 kB)\n",
-      "Collecting posthog (from haystack-ai)\n",
-      "  Downloading posthog-3.6.5-py2.py3-none-any.whl.metadata (2.0 kB)\n",
-      "Requirement already satisfied: python-dateutil in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from haystack-ai) (2.9.0)\n",
-      "Requirement already satisfied: pyyaml in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from haystack-ai) (6.0.1)\n",
-      "Requirement already satisfied: requests in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from haystack-ai) (2.32.3)\n",
-      "Collecting tenacity!=8.4.0 (from haystack-ai)\n",
-      "  Using cached tenacity-9.0.0-py3-none-any.whl.metadata (1.2 kB)\n",
-      "Collecting tqdm (from haystack-ai)\n",
-      "  Using cached tqdm-4.66.5-py3-none-any.whl.metadata (57 kB)\n",
-      "Requirement already satisfied: typing-extensions>=4.7 in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from haystack-ai) (4.12.2)\n",
-      "Collecting anyio<5,>=3.5.0 (from openai>=1.1.0->haystack-ai)\n",
-      "  Using cached anyio-4.4.0-py3-none-any.whl.metadata (4.6 kB)\n",
-      "Collecting distro<2,>=1.7.0 (from openai>=1.1.0->haystack-ai)\n",
-      "  Using cached distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)\n",
-      "Collecting httpx<1,>=0.23.0 (from openai>=1.1.0->haystack-ai)\n",
-      "  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)\n",
-      "Collecting jiter<1,>=0.4.0 (from openai>=1.1.0->haystack-ai)\n",
-      "  Using cached jiter-0.5.0-cp312-cp312-macosx_10_12_x86_64.whl.metadata (3.6 kB)\n",
-      "Collecting pydantic<3,>=1.9.0 (from openai>=1.1.0->haystack-ai)\n",
-      "  Downloading pydantic-2.9.1-py3-none-any.whl.metadata (146 kB)\n",
-      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m147.0/147.0 kB\u001b[0m \u001b[31m5.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hCollecting sniffio (from openai>=1.1.0->haystack-ai)\n",
-      "  Using cached sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)\n",
-      "Requirement already satisfied: MarkupSafe>=2.0 in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from jinja2->haystack-ai) (2.1.5)\n",
-      "Collecting pytz>=2020.1 (from pandas->haystack-ai)\n",
-      "  Downloading pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)\n",
-      "Collecting tzdata>=2022.7 (from pandas->haystack-ai)\n",
-      "  Using cached tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)\n",
-      "Requirement already satisfied: six>=1.5 in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from python-dateutil->haystack-ai) (1.16.0)\n",
-      "Collecting monotonic>=1.5 (from posthog->haystack-ai)\n",
-      "  Using cached monotonic-1.6-py2.py3-none-any.whl.metadata (1.5 kB)\n",
-      "Collecting backoff>=1.10.0 (from posthog->haystack-ai)\n",
-      "  Using cached backoff-2.2.1-py3-none-any.whl.metadata (14 kB)\n",
-      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from requests->haystack-ai) (3.3.2)\n",
-      "Requirement already satisfied: idna<4,>=2.5 in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from requests->haystack-ai) (3.7)\n",
-      "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from requests->haystack-ai) (2.2.2)\n",
-      "Requirement already satisfied: certifi>=2017.4.17 in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from requests->haystack-ai) (2024.6.2)\n",
-      "Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai>=1.1.0->haystack-ai)\n",
-      "  Using cached httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)\n",
-      "Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai>=1.1.0->haystack-ai)\n",
-      "  Using cached h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)\n",
-      "Collecting annotated-types>=0.6.0 (from pydantic<3,>=1.9.0->openai>=1.1.0->haystack-ai)\n",
-      "  Using cached annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)\n",
-      "Collecting pydantic-core==2.23.3 (from pydantic<3,>=1.9.0->openai>=1.1.0->haystack-ai)\n",
-      "  Downloading pydantic_core-2.23.3-cp312-cp312-macosx_10_12_x86_64.whl.metadata (6.6 kB)\n",
-      "Using cached haystack_ai-2.5.1-py3-none-any.whl (351 kB)\n",
-      "Using cached numpy-1.26.4-cp312-cp312-macosx_10_9_x86_64.whl (20.3 MB)\n",
-      "Downloading openai-1.45.0-py3-none-any.whl (374 kB)\n",
-      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m374.1/374.1 kB\u001b[0m \u001b[31m12.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
-      "\u001b[?25hUsing cached tenacity-9.0.0-py3-none-any.whl (28 kB)\n",
-      "Using cached tqdm-4.66.5-py3-none-any.whl (78 kB)\n",
-      "Using cached haystack_experimental-0.1.1-py3-none-any.whl (41 kB)\n",
-      "Using cached lazy_imports-0.3.1-py3-none-any.whl (12 kB)\n",
-      "Downloading more_itertools-10.5.0-py3-none-any.whl (60 kB)\n",
-      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.0/61.0 kB\u001b[0m \u001b[31m4.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hUsing cached networkx-3.3-py3-none-any.whl (1.7 MB)\n",
-      "Using cached pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl (12.5 MB)\n",
-      "Downloading posthog-3.6.5-py2.py3-none-any.whl (54 kB)\n",
-      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.2/54.2 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hUsing cached anyio-4.4.0-py3-none-any.whl (86 kB)\n",
-      "Using cached backoff-2.2.1-py3-none-any.whl (15 kB)\n",
-      "Using cached distro-1.9.0-py3-none-any.whl (20 kB)\n",
-      "Downloading httpx-0.27.2-py3-none-any.whl (76 kB)\n",
-      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m76.4/76.4 kB\u001b[0m \u001b[31m6.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hUsing cached httpcore-1.0.5-py3-none-any.whl (77 kB)\n",
-      "Using cached jiter-0.5.0-cp312-cp312-macosx_10_12_x86_64.whl (283 kB)\n",
-      "Using cached monotonic-1.6-py2.py3-none-any.whl (8.2 kB)\n",
-      "Downloading pydantic-2.9.1-py3-none-any.whl (434 kB)\n",
-      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m434.4/434.4 kB\u001b[0m \u001b[31m7.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
-      "\u001b[?25hDownloading pydantic_core-2.23.3-cp312-cp312-macosx_10_12_x86_64.whl (1.8 MB)\n",
-      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.8/1.8 MB\u001b[0m \u001b[31m8.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0mm\n",
-      "\u001b[?25hDownloading pytz-2024.2-py2.py3-none-any.whl (508 kB)\n",
-      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m508.0/508.0 kB\u001b[0m \u001b[31m10.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n",
-      "\u001b[?25hUsing cached sniffio-1.3.1-py3-none-any.whl (10 kB)\n",
-      "Using cached tzdata-2024.1-py2.py3-none-any.whl (345 kB)\n",
-      "Using cached annotated_types-0.7.0-py3-none-any.whl (13 kB)\n",
-      "Using cached h11-0.14.0-py3-none-any.whl (58 kB)\n",
-      "Installing collected packages: pytz, monotonic, tzdata, tqdm, tenacity, sniffio, pydantic-core, numpy, networkx, more-itertools, lazy-imports, jiter, h11, distro, backoff, annotated-types, pydantic, posthog, pandas, httpcore, anyio, httpx, openai, haystack-experimental, haystack-ai\n",
-      "Successfully installed annotated-types-0.7.0 anyio-4.4.0 backoff-2.2.1 distro-1.9.0 h11-0.14.0 haystack-ai-2.5.1 haystack-experimental-0.1.1 httpcore-1.0.5 httpx-0.27.2 jiter-0.5.0 lazy-imports-0.3.1 monotonic-1.6 more-itertools-10.5.0 networkx-3.3 numpy-1.26.4 openai-1.45.0 pandas-2.2.2 posthog-3.6.5 pydantic-2.9.1 pydantic-core-2.23.3 pytz-2024.2 sniffio-1.3.1 tenacity-9.0.0 tqdm-4.66.5 tzdata-2024.1\n",
-      "Collecting langdetect\n",
-      "  Using cached langdetect-1.0.9-py3-none-any.whl\n",
-      "Requirement already satisfied: six in /Users/tuanacelik/opt/anaconda3/envs/tutorials/lib/python3.12/site-packages (from langdetect) (1.16.0)\n",
-      "Installing collected packages: langdetect\n",
-      "Successfully installed langdetect-1.0.9\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%%bash\n",
     "\n",
@@ -194,7 +86,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {
     "id": "Ubr7yVt6Gbnj"
    },
@@ -220,7 +112,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {
     "id": "mN2fFuWWP_8D"
    },
@@ -267,7 +159,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {
     "id": "rfC1ZCigQJgI"
    },
@@ -291,7 +183,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 5,
    "metadata": {
     "id": "FlqGdbuxQNKk"
    },
@@ -306,7 +198,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 6,
    "metadata": {
     "id": "FEw5pfmBQRBB"
    },
@@ -328,11 +220,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 7,
    "metadata": {
     "id": "BdvO_fEfcVAY"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<haystack.core.pipeline.pipeline.Pipeline object at 0x3086b9700>\n",
+       "🚅 Components\n",
+       "  - language_classifier: DocumentLanguageClassifier\n",
+       "  - router: MetadataRouter\n",
+       "  - en_writer: DocumentWriter\n",
+       "  - fr_writer: DocumentWriter\n",
+       "  - es_writer: DocumentWriter\n",
+       "🛤️ Connections\n",
+       "  - language_classifier.documents -> router.documents (List[Document])\n",
+       "  - router.en -> en_writer.documents (List[Document])\n",
+       "  - router.fr -> fr_writer.documents (List[Document])\n",
+       "  - router.es -> es_writer.documents (List[Document])"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "indexing_pipeline = Pipeline()\n",
     "indexing_pipeline.add_component(instance=language_classifier, name=\"language_classifier\")\n",
@@ -379,7 +293,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 9,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -391,13 +305,15 @@
     {
      "data": {
       "text/plain": [
-       "{'router': {'unmatched': []},\n",
+       "{'router': {'unmatched': [Document(id=ea7ea338874232de2d8105a258813f50345db82772e21ad2c4549dbb7adce8a3, content: 'Super appartement. Juste au dessus de plusieurs bars qui ferment très tard. A savoir à l'avance. (Bo...', meta: {'language': 'fr'}),\n",
+       "   Document(id=6b64c8a60543ee32b81cd39bc8d6e09fae4bff1b22c6ccdcf414db26fa354e7a, content: 'Un peu salé surtout le sol. Manque de service et de souplesse', meta: {'language': 'fr'}),\n",
+       "   Document(id=b1be23526f19a8af80a190e775bfd05e65878e585529037cb45b47267a4eaa98, content: 'Nous avons passé un séjour formidable. Merci aux personnes , le bonjours à Ricardo notre taxi man, t...', meta: {'language': 'fr'})]},\n",
        " 'en_writer': {'documents_written': 2},\n",
-       " 'fr_writer': {'documents_written': 3},\n",
+       " 'fr_writer': {'documents_written': 0},\n",
        " 'es_writer': {'documents_written': 2}}"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -419,7 +335,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 10,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -433,7 +349,7 @@
      "output_type": "stream",
      "text": [
       "English documents:  [Document(id=8f64ab234c6a5d5652d02bed144d069ec6e988903b071d16fffbf400abfc1047, content: 'The keypad with a code is convenient and the location is convenient. Basically everything else, very...', meta: {'language': 'en'}), Document(id=d4d878288efba5e28a43ae0195e43dadd0298fe36d3d9b3075c5c5120d27763e, content: 'It is very central and appartement has a nice appearance (even though a lot IKEA stuff), *W A R N I ...', meta: {'language': 'en'})]\n",
-      "French documents:  [Document(id=ea7ea338874232de2d8105a258813f50345db82772e21ad2c4549dbb7adce8a3, content: 'Super appartement. Juste au dessus de plusieurs bars qui ferment très tard. A savoir à l'avance. (Bo...', meta: {'language': 'fr'}), Document(id=6b64c8a60543ee32b81cd39bc8d6e09fae4bff1b22c6ccdcf414db26fa354e7a, content: 'Un peu salé surtout le sol. Manque de service et de souplesse', meta: {'language': 'fr'}), Document(id=b1be23526f19a8af80a190e775bfd05e65878e585529037cb45b47267a4eaa98, content: 'Nous avons passé un séjour formidable. Merci aux personnes , le bonjours à Ricardo notre taxi man, t...', meta: {'language': 'fr'})]\n",
+      "French documents:  []\n",
       "Spanish documents:  [Document(id=72b094c163b22a660528bc5adbdf0fecf96b4b4d753c1b117f15dba482d2f948, content: 'El apartamento estaba genial y muy céntrico, todo a mano. Al lado de la librería Lello y De la Torre...', meta: {'language': 'es'}), Document(id=4b37b8bdfffccfb3211ea167b4fdc5121ca51fc5f869b4f834e8da473f0d3353, content: 'Céntrico. Muy cómodo para moverse y ver Oporto. Edificio con terraza propia en la última planta. Tod...', meta: {'language': 'es'})]\n"
      ]
     }
@@ -459,7 +375,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 11,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -489,7 +405,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 12,
    "metadata": {
     "id": "CN1N2sn1yUVx"
    },
@@ -497,11 +413,12 @@
    "source": [
     "from haystack.components.retrievers.in_memory import InMemoryBM25Retriever\n",
     "from haystack.components.joiners import DocumentJoiner\n",
-    "from haystack.components.builders import PromptBuilder\n",
-    "from haystack.components.generators import OpenAIGenerator\n",
+    "from haystack.components.builders import ChatPromptBuilder\n",
+    "from haystack.components.generators.chat import OpenAIChatGenerator\n",
+    "from haystack.dataclasses import ChatMessage\n",
     "from haystack.components.routers import TextLanguageRouter\n",
     "\n",
-    "prompt_template = \"\"\"\n",
+    "prompt_template = [ChatMessage.from_user(\"\"\"\n",
     "You will be provided with reviews for an accommodation.\n",
     "Answer the question concisely based solely on the given reviews.\n",
     "Reviews:\n",
@@ -510,7 +427,7 @@
     "  {% endfor %}\n",
     "Question: {{ query}}\n",
     "Answer:\n",
-    "\"\"\""
+    "\"\"\")]"
    ]
   },
   {
@@ -525,19 +442,47 @@
     "- `TextLanguageRouter`\n",
     "- `InMemoryBM25Retriever`. You'll need a retriever per language, since each language has its own `DocumentStore`.\n",
     "- `DocumentJoiner`\n",
-    "- `PromptBuilder`\n",
-    "- `OpenAIGenerator`\n",
+    "- `ChatPromptBuilder`\n",
+    "- `OpenAIChatGenerator`\n",
     "\n",
     "> Note: The `BM25Retriever` essentially does keyword matching, which isn't as accurate as other search methods. In order to make the LLM responses more precise, you could refacctor your piplines to use an [`EmbeddingRetriever`](https://docs.haystack.deepset.ai/docs/inmemoryembeddingretriever) which performs vector search over the documents."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 13,
    "metadata": {
     "id": "BN1Hr_BjWKcl"
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<haystack.core.pipeline.pipeline.Pipeline object at 0x30e5ac340>\n",
+       "🚅 Components\n",
+       "  - router: TextLanguageRouter\n",
+       "  - en_retriever: InMemoryBM25Retriever\n",
+       "  - fr_retriever: InMemoryBM25Retriever\n",
+       "  - es_retriever: InMemoryBM25Retriever\n",
+       "  - joiner: DocumentJoiner\n",
+       "  - prompt_builder: ChatPromptBuilder\n",
+       "  - llm: OpenAIChatGenerator\n",
+       "🛤️ Connections\n",
+       "  - router.en -> en_retriever.query (str)\n",
+       "  - router.fr -> fr_retriever.query (str)\n",
+       "  - router.es -> es_retriever.query (str)\n",
+       "  - en_retriever.documents -> joiner.documents (List[Document])\n",
+       "  - fr_retriever.documents -> joiner.documents (List[Document])\n",
+       "  - es_retriever.documents -> joiner.documents (List[Document])\n",
+       "  - joiner.documents -> prompt_builder.documents (List[Document])\n",
+       "  - prompt_builder.prompt -> llm.messages (List[ChatMessage])"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "rag_pipeline = Pipeline()\n",
     "rag_pipeline.add_component(instance=TextLanguageRouter([\"en\", \"fr\", \"es\"]), name=\"router\")\n",
@@ -545,8 +490,8 @@
     "rag_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=fr_document_store), name=\"fr_retriever\")\n",
     "rag_pipeline.add_component(instance=InMemoryBM25Retriever(document_store=es_document_store), name=\"es_retriever\")\n",
     "rag_pipeline.add_component(instance=DocumentJoiner(), name=\"joiner\")\n",
-    "rag_pipeline.add_component(instance=PromptBuilder(template=prompt_template), name=\"prompt_builder\")\n",
-    "rag_pipeline.add_component(instance=OpenAIGenerator(), name=\"llm\")\n",
+    "rag_pipeline.add_component(instance=ChatPromptBuilder(template=prompt_template), name=\"prompt_builder\")\n",
+    "rag_pipeline.add_component(instance=OpenAIChatGenerator(), name=\"llm\")\n",
     "\n",
     "\n",
     "rag_pipeline.connect(\"router.en\", \"en_retriever.query\")\n",
@@ -556,7 +501,7 @@
     "rag_pipeline.connect(\"fr_retriever\", \"joiner\")\n",
     "rag_pipeline.connect(\"es_retriever\", \"joiner\")\n",
     "rag_pipeline.connect(\"joiner.documents\", \"prompt_builder.documents\")\n",
-    "rag_pipeline.connect(\"prompt_builder\", \"llm\")"
+    "rag_pipeline.connect(\"prompt_builder.prompt\", \"llm.messages\")"
    ]
   },
   {
@@ -570,7 +515,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 14,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -595,7 +540,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 15,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -617,22 +562,7 @@
     "id": "wj24fjXN0l6v",
     "outputId": "3c1eed33-c31c-4b72-bcda-fdd64744560b"
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Ranking by BM25...: 100%|██████████| 2/2 [00:00<00:00, 3134.76 docs/s]"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "en_question = \"Is this apartment conveniently located?\"\n",
     "\n",
@@ -641,7 +571,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 16,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -654,7 +584,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Yes, the apartment is conveniently located.\n"
+      "ChatMessage(content='Yes, the apartment is conveniently located.', role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'gpt-4o-mini-2024-07-18', 'index': 0, 'finish_reason': 'stop', 'usage': {'completion_tokens': 8, 'prompt_tokens': 365, 'total_tokens': 373, 'prompt_tokens_details': {'cached_tokens': 0, 'audio_tokens': 0}, 'completion_tokens_details': {'reasoning_tokens': 0, 'audio_tokens': 0, 'accepted_prediction_tokens': 0, 'rejected_prediction_tokens': 0}}})\n"
      ]
     }
    ],
@@ -673,7 +603,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 17,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/",
@@ -695,15 +625,7 @@
     "id": "B4_Be1bs1jxJ",
     "outputId": "0b96cf29-d633-4c9b-f54c-a785e1c2cbe4"
    },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Ranking by BM25...: 100%|██████████| 2/2 [00:00<00:00, 15887.52 docs/s]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "es_question = \"¿El desayuno es genial?\"\n",
     "\n",
@@ -712,7 +634,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 18,
    "metadata": {
     "colab": {
      "base_uri": "https://localhost:8080/"
@@ -725,12 +647,12 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "No, el desayuno no es genial.\n"
+      "Sí, el desayuno es descrito como estupendo.\n"
      ]
     }
    ],
    "source": [
-    "print(result[\"llm\"][\"replies\"][0])"
+    "print(result[\"llm\"][\"replies\"][0].content)"
    ]
   },
   {
@@ -773,7 +695,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.4"
+   "version": "3.9.6"
   },
   "widgets": {
    "application/vnd.jupyter.widget-state+json": {