Skip to content

Commit

Permalink
Updated tutorial 32
Browse files Browse the repository at this point in the history
  • Loading branch information
Amnah199 committed Dec 8, 2024
1 parent 19c4d95 commit 086a13e
Show file tree
Hide file tree
Showing 2 changed files with 211 additions and 201 deletions.
148 changes: 118 additions & 30 deletions tutorials/30_File_Type_Preprocessing_Index_Pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {
"id": "CkvJIU7FmDf9"
},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/amna.mubashar/Library/Python/3.9/lib/python/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"from haystack.telemetry import tutorial_running\n",
"\n",
Expand All @@ -117,9 +126,22 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"['recipe_files/vegan_flan_recipe.md',\n",
" 'recipe_files/vegan_keto_eggplant_recipe_fixed.pdf',\n",
" 'recipe_files/vegan_sunflower_hemp_cheese_recipe.txt']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import gdown\n",
"\n",
Expand Down Expand Up @@ -180,7 +202,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {
"id": "hCWlpiQCBYOg"
},
Expand All @@ -201,7 +223,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {
"id": "TVXSX0GHBtdj"
},
Expand All @@ -222,7 +244,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {
"id": "4yGXKHEXIZxi"
},
Expand Down Expand Up @@ -251,7 +273,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
Expand All @@ -260,7 +282,39 @@
"id": "gafXWtNYfNbr",
"outputId": "10f351de-ac09-4273-85a2-ac7b59fb2f77"
},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"<haystack.core.pipeline.pipeline.Pipeline object at 0x320eb2790>\n",
"🚅 Components\n",
" - file_type_router: FileTypeRouter\n",
" - text_file_converter: TextFileToDocument\n",
" - markdown_converter: MarkdownToDocument\n",
" - pypdf_converter: PyPDFToDocument\n",
" - document_joiner: DocumentJoiner\n",
" - document_cleaner: DocumentCleaner\n",
" - document_splitter: DocumentSplitter\n",
" - document_embedder: SentenceTransformersDocumentEmbedder\n",
" - document_writer: DocumentWriter\n",
"🛤️ Connections\n",
" - file_type_router.text/plain -> text_file_converter.sources (List[Union[str, Path, ByteStream]])\n",
" - file_type_router.application/pdf -> pypdf_converter.sources (List[Union[str, Path, ByteStream]])\n",
" - file_type_router.text/markdown -> markdown_converter.sources (List[Union[str, Path, ByteStream]])\n",
" - text_file_converter.documents -> document_joiner.documents (List[Document])\n",
" - markdown_converter.documents -> document_joiner.documents (List[Document])\n",
" - pypdf_converter.documents -> document_joiner.documents (List[Document])\n",
" - document_joiner.documents -> document_cleaner.documents (List[Document])\n",
" - document_cleaner.documents -> document_splitter.documents (List[Document])\n",
" - document_splitter.documents -> document_embedder.documents (List[Document])\n",
" - document_embedder.documents -> document_writer.documents (List[Document])"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"preprocessing_pipeline.connect(\"file_type_router.text/plain\", \"text_file_converter.sources\")\n",
"preprocessing_pipeline.connect(\"file_type_router.application/pdf\", \"pypdf_converter.sources\")\n",
Expand Down Expand Up @@ -325,7 +379,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 10,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
Expand Down Expand Up @@ -357,7 +411,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
Expand All @@ -366,14 +420,35 @@
"id": "_s--8xEWq8Y9",
"outputId": "1c050d5f-f2ae-4cd3-e0d4-533397a6af63"
},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"<haystack.core.pipeline.pipeline.Pipeline object at 0x3737807f0>\n",
"🚅 Components\n",
" - embedder: SentenceTransformersTextEmbedder\n",
" - retriever: InMemoryEmbeddingRetriever\n",
" - chat_prompt_builder: ChatPromptBuilder\n",
" - llm: HuggingFaceAPIChatGenerator\n",
"🛤️ Connections\n",
" - embedder.embedding -> retriever.query_embedding (List[float])\n",
" - retriever.documents -> chat_prompt_builder.documents (List[Document])\n",
" - chat_prompt_builder.prompt -> llm.messages (List[ChatMessage])"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from haystack.components.embedders import SentenceTransformersTextEmbedder\n",
"from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever\n",
"from haystack.components.builders import PromptBuilder\n",
"from haystack.components.generators import HuggingFaceAPIGenerator\n",
"from haystack.components.builders import ChatPromptBuilder\n",
"from haystack.dataclasses import ChatMessage\n",
"from haystack.components.generators.chat import HuggingFaceAPIChatGenerator\n",
"\n",
"template = \"\"\"\n",
"template = [ChatMessage.from_user(\"\"\"\n",
"Answer the questions based on the given context.\n",
"\n",
"Context:\n",
Expand All @@ -383,19 +458,19 @@
"\n",
"Question: {{ question }}\n",
"Answer:\n",
"\"\"\"\n",
"\"\"\")]\n",
"pipe = Pipeline()\n",
"pipe.add_component(\"embedder\", SentenceTransformersTextEmbedder(model=\"sentence-transformers/all-MiniLM-L6-v2\"))\n",
"pipe.add_component(\"retriever\", InMemoryEmbeddingRetriever(document_store=document_store))\n",
"pipe.add_component(\"prompt_builder\", PromptBuilder(template=template))\n",
"pipe.add_component(\"chat_prompt_builder\", ChatPromptBuilder(template=template))\n",
"pipe.add_component(\n",
" \"llm\",\n",
" HuggingFaceAPIGenerator(api_type=\"serverless_inference_api\", api_params={\"model\": \"HuggingFaceH4/zephyr-7b-beta\"}),\n",
" HuggingFaceAPIChatGenerator(api_type=\"serverless_inference_api\", api_params={\"model\": \"HuggingFaceH4/zephyr-7b-beta\"}),\n",
")\n",
"\n",
"pipe.connect(\"embedder.embedding\", \"retriever.query_embedding\")\n",
"pipe.connect(\"retriever\", \"prompt_builder.documents\")\n",
"pipe.connect(\"prompt_builder\", \"llm\")"
"pipe.connect(\"retriever\", \"chat_prompt_builder.documents\")\n",
"pipe.connect(\"chat_prompt_builder.prompt\", \"llm.messages\")"
]
},
{
Expand All @@ -409,11 +484,29 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"metadata": {
"id": "qDqrU5emtBWQ"
},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Batches: 100%|██████████| 1/1 [00:00<00:00, 3.20it/s]\n"
]
},
{
"data": {
"text/plain": [
"{'llm': {'replies': [ChatMessage(content=\"For vegan keto eggplant lasagna:\\n- 2 large eggplants\\n- Hella salt (optional)\\n- 1/2 cup store-bought vegan mozzarella cheese (for topping)\\n- Pesto:\\n - 4 oz basil (generally one large clamshell or 2 small ones)\\n - 1/4 cup almonds\\n - 1/4 cup nutritional yeast\\n - 1/4 cup olive oil\\n- Spinach tofu ricotta:\\n - 1 recipe spinach tofu ricotta\\n- 1 tsp garlic powder\\n- Juice of half a lemon\\n- Salt to taste\\n\\nFor macadamia nut cheese:\\n- 1 cup macadamia nuts (unsalted and unroasted)\\n- Salt (optional)\\n\\nInstructions:\\n1. Preheat oven to 400°F.\\n2. Slice eggplants into 1/4 inch thick slices and rub both sides with salt. Let sit for 20-30 minutes to extract moisture. Rinse with water and pat dry.\\n3. Roast the eggplant in the oven for about 20 minutes or until they're soft and brown in spots, rotating the pans halfway through.\\n4. Reduce oven temperature to 350°F.\\n5. In a separate bowl, mix together the store-bought vegan mozzarella cheese (for topping) with spinach tofu ricotta.\\n6. Assemble the lasagna: spread a layer of roasted eggplant at the bottom of the casserole dish, followed by a layer of pesto and a layer of the cheese mixture. Repeat until all ingredients are used, finishing with a layer of roasted eggplant. Sprinkle the remaining store-bought vegan mozzarella cheese (for topping) on top.\\n7. Bake for 25 minutes. Optionally, broil for 1-2 minutes at the end to melt the cheese.\\n\\nFor vegan persimmon flan:\\n- 1/2 cup persimmon pulp, strained (about 2 medium persimmons)\\n- 1 tbsp cornstarch\\n- 1/2 tsp agar agar\\n-\", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'HuggingFaceH4/zephyr-7b-beta', 'finish_reason': 'length', 'index': 0, 'usage': ChatCompletionOutputUsage(completion_tokens=512, prompt_tokens=2276, total_tokens=2788)})]}}"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"question = (\n",
" \"What ingredients would I need to make vegan keto eggplant lasagna, vegan persimmon flan, and vegan hemp cheese?\"\n",
Expand All @@ -422,8 +515,8 @@
"pipe.run(\n",
" {\n",
" \"embedder\": {\"text\": question},\n",
" \"prompt_builder\": {\"question\": question},\n",
" \"llm\": {\"generation_kwargs\": {\"max_new_tokens\": 350}},\n",
" \"chat_prompt_builder\": {\"question\": question},\n",
" \n",
" }\n",
")"
]
Expand All @@ -434,12 +527,7 @@
"id": "ZJueu_V4KP6w"
},
"source": [
"```python\n",
"{'llm': {'replies': [\"\\n\\nVegan Keto Eggplant Lasagna:\\n\\nIngredients:\\n- 2 large eggplants\\n- A lot of salt (you should have this in your house already)\\n- 1/2 cup store-bought vegan mozzarella (for topping)\\n\\nPesto:\\n- 4 oz basil (generally one large clamshell or 2 small ones)\\n- 1/4 cup almonds\\n- 1/4 cup nutritional yeast\\n- 1/4 cup olive oil\\n- 1 recipe vegan pesto (you can find this in the recipe)\\n- 1 recipe spinach tofu ricotta (you can find this in the recipe)\\n- 1 tsp garlic powder\\n- Juice of half a lemon\\n- Salt to taste\\n\\nSpinach Tofu Ricotta:\\n- 10 oz firm or extra firm tofu\\n- Juice of 1 lemon\\n- Garlic powder to taste\\n- Salt to taste\\n\\nInstructions:\\n1. Slice the eggplants into 1/4 inch thick slices. Some slices will need to be scrapped because it's difficult to get them all uniformly thin. Use them in soup or something, IDK, man.\\n2. Take the eggplant slices and rub both sides with salt. Don't be shy about how much, you're gonna rinse it off anyway.\\n3. Put them in a colander with something underneath it and let them sit for half an hour. This draws the water out so that the egg\"],\n",
" 'meta': [{'model': 'HuggingFaceH4/zephyr-7b-beta',\n",
" ...\n",
" }]}}\n",
"```"
"{'llm': {'replies': [ChatMessage(content=\"For vegan keto eggplant lasagna:\\n- 2 large eggplants\\n- Hella salt (optional)\\n- 1/2 cup store-bought vegan mozzarella cheese (for topping)\\n- Pesto:\\n - 4 oz basil (generally one large clamshell or 2 small ones)\\n - 1/4 cup almonds\\n - 1/4 cup nutritional yeast\\n - 1/4 cup olive oil\\n- Spinach tofu ricotta:\\n - 1 recipe spinach tofu ricotta\\n- 1 tsp garlic powder\\n- Juice of half a lemon\\n- Salt to taste\\n\\nFor macadamia nut cheese:\\n- 1 cup macadamia nuts (unsalted and unroasted)\\n- Salt (optional)\\n\\nInstructions:\\n1. Preheat oven to 400°F.\\n2. Slice eggplants into 1/4 inch thick slices and rub both sides with salt. Let sit for 20-30 minutes to extract moisture. Rinse with water and pat dry.\\n3. Roast the eggplant in the oven for about 20 minutes or until they're soft and brown in spots, rotating the pans halfway through.\\n4. Reduce oven temperature to 350°F.\\n5. In a separate bowl, mix together the store-bought vegan mozzarella cheese (for topping) with spinach tofu ricotta.\\n6. Assemble the lasagna: spread a layer of roasted eggplant at the bottom of the casserole dish, followed by a layer of pesto and a layer of the cheese mixture. Repeat until all ingredients are used, finishing with a layer of roasted eggplant. Sprinkle the remaining store-bought vegan mozzarella cheese (for topping) on top.\\n7. Bake for 25 minutes. Optionally, broil for 1-2 minutes at the end to melt the cheese.\\n\\nFor vegan persimmon flan:\\n- 1/2 cup persimmon pulp, strained (about 2 medium persimmons)\\n- 1 tbsp cornstarch\\n- 1/2 tsp agar agar\\n-\", role=<ChatRole.ASSISTANT: 'assistant'>, name=None, meta={'model': 'HuggingFaceH4/zephyr-7b-beta', 'finish_reason': 'length', 'index': 0, 'usage': ChatCompletionOutputUsage(completion_tokens=512, prompt_tokens=2276, total_tokens=2788)})]}}"
]
},
{
Expand Down
Loading

0 comments on commit 086a13e

Please sign in to comment.