first vignette draft: pure RAG

biocypher · Jan 31, 2024 · dfca56f · dfca56f
1 parent ba032a3
commit dfca56f
Show file tree

Hide file tree

Showing 12 changed files with 136 additions and 47 deletions.
diff --git a/benchmark/results/preprocessed_for_frontend/entity_selection.csv b/benchmark/results/preprocessed_for_frontend/entity_selection.csv
@@ -1,15 +1,15 @@
 Model name,Passed test cases,Total test cases,Score,Iterations
 gpt-3.5-turbo,4.0,4.0,1.0,2
 gpt-4,4.0,4.0,1.0,2
-llama-2-chat:13:ggmlv3:q2_K,0.0,4.0,0.0,2
-llama-2-chat:13:ggmlv3:q4_0,1.0,4.0,0.25,2
-llama-2-chat:13:ggmlv3:q5_0,0.0,4.0,0.0,2
-llama-2-chat:13:ggmlv3:q8_0,0.0,4.0,0.0,2
 llama-2-chat:7:ggmlv3:q2_K,2.0,4.0,0.5,2
 llama-2-chat:7:ggmlv3:q4_0,2.0,4.0,0.5,2
 llama-2-chat:7:ggmlv3:q5_0,2.0,4.0,0.5,2
 llama-2-chat:7:ggmlv3:q8_0,2.0,4.0,0.5,2
-mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,4.0,0.0,2
-mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,4.0,0.0,2
+llama-2-chat:13:ggmlv3:q4_0,1.0,4.0,0.25,2
 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1.0,4.0,0.25,2
 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,1.0,4.0,0.25,2
+llama-2-chat:13:ggmlv3:q2_K,0.0,4.0,0.0,2
+llama-2-chat:13:ggmlv3:q5_0,0.0,4.0,0.0,2
+llama-2-chat:13:ggmlv3:q8_0,0.0,4.0,0.0,2
+mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,4.0,0.0,2
+mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,4.0,0.0,2
diff --git a/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv b/benchmark/results/preprocessed_for_frontend/implicit_relevance_of_multiple_fragments.csv
@@ -2,14 +2,14 @@ Model name,Passed test cases,Total test cases,Score,Iterations
 gpt-3.5-turbo,1.0,1.0,1.0,2
 gpt-4,1.0,1.0,1.0,2
 llama-2-chat:13:ggmlv3:q2_K,1.0,1.0,1.0,2
+mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1.0,1.0,1.0,2
+mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,1.0,1.0,1.0,2
 llama-2-chat:13:ggmlv3:q4_0,0.5,1.0,0.5,2
 llama-2-chat:13:ggmlv3:q5_0,0.5,1.0,0.5,2
 llama-2-chat:13:ggmlv3:q8_0,0.5,1.0,0.5,2
-llama-2-chat:7:ggmlv3:q2_K,0.0,1.0,0.0,2
-llama-2-chat:7:ggmlv3:q4_0,0.0,1.0,0.0,2
 llama-2-chat:7:ggmlv3:q5_0,0.5,1.0,0.5,2
-llama-2-chat:7:ggmlv3:q8_0,0.0,1.0,0.0,2
-mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1.0,1.0,1.0,2
-mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,1.0,1.0,1.0,2
 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.5,1.0,0.5,2
 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.5,1.0,0.5,2
+llama-2-chat:7:ggmlv3:q2_K,0.0,1.0,0.0,2
+llama-2-chat:7:ggmlv3:q4_0,0.0,1.0,0.0,2
+llama-2-chat:7:ggmlv3:q8_0,0.0,1.0,0.0,2
diff --git a/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv b/benchmark/results/preprocessed_for_frontend/overview-aggregated.csv
@@ -1,15 +1,15 @@
 Model name,Mean
 gpt-3.5-turbo,0.9246323529411764
 gpt-4,0.8411239495798319
-llama-2-chat:13:ggmlv3:q2_K,0.19642857142857142
-llama-2-chat:13:ggmlv3:q4_0,0.2455357142857143
-llama-2-chat:13:ggmlv3:q5_0,0.26785714285714285
-llama-2-chat:13:ggmlv3:q8_0,0.28273809523809523
-llama-2-chat:7:ggmlv3:q2_K,0.23030462184873948
-llama-2-chat:7:ggmlv3:q4_0,0.26339285714285715
 llama-2-chat:7:ggmlv3:q5_0,0.35714285714285715
-llama-2-chat:7:ggmlv3:q8_0,0.25297619047619047
-mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.2901785714285714
 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.3348214285714286
 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.328125
 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.296875
+mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.2901785714285714
+llama-2-chat:13:ggmlv3:q8_0,0.28273809523809523
+llama-2-chat:13:ggmlv3:q5_0,0.26785714285714285
+llama-2-chat:7:ggmlv3:q4_0,0.26339285714285715
+llama-2-chat:7:ggmlv3:q8_0,0.25297619047619047
+llama-2-chat:13:ggmlv3:q4_0,0.2455357142857143
+llama-2-chat:7:ggmlv3:q2_K,0.23030462184873948
+llama-2-chat:13:ggmlv3:q2_K,0.19642857142857142
diff --git a/benchmark/results/preprocessed_for_frontend/overview.csv b/benchmark/results/preprocessed_for_frontend/overview.csv
@@ -1,15 +1,15 @@
 Model name,property_selection,query_generation,explicit_relevance_of_single_fragments,entity_selection,relationship_selection,end_to_end_query_generation,implicit_relevance_of_multiple_fragments,property_exists,Mean
 gpt-3.5-turbo,0.6470588235294118,1.0,0.75,1.0,1.0,1.0,1.0,1.0,0.9246323529411764
 gpt-4,0.7647058823529411,0.9642857142857144,0.0,1.0,1.0,1.0,1.0,1.0,0.8411239495798319
-llama-2-chat:13:ggmlv3:q2_K,0.0,0.5714285714285714,0.0,0.0,0.0,0.0,1.0,0.0,0.19642857142857142
-llama-2-chat:13:ggmlv3:q4_0,0.0,0.7142857142857143,0.0,0.25,0.0,0.0,0.5,0.5,0.2455357142857143
-llama-2-chat:13:ggmlv3:q5_0,0.0,0.8928571428571429,0.0,0.0,0.0,0.0,0.5,0.75,0.26785714285714285
-llama-2-chat:13:ggmlv3:q8_0,0.0,0.9285714285714286,0.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.28273809523809523
-llama-2-chat:7:ggmlv3:q2_K,0.2352941176470588,0.6071428571428571,0.0,0.5,0.0,0.0,0.0,0.5,0.23030462184873948
-llama-2-chat:7:ggmlv3:q4_0,0.0,0.6071428571428571,0.0,0.5,0.0,0.0,0.0,1.0,0.26339285714285715
 llama-2-chat:7:ggmlv3:q5_0,0.0,0.8571428571428571,0.0,0.5,0.5,0.0,0.5,0.5,0.35714285714285715
-llama-2-chat:7:ggmlv3:q8_0,0.0,0.8571428571428571,0.0,0.5,0.0,0.0,0.0,0.6666666666666666,0.25297619047619047
-mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,0.8214285714285714,0.0,0.0,0.0,0.0,1.0,0.5,0.2901785714285714
 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,0.9285714285714286,0.0,0.0,0.0,0.0,1.0,0.75,0.3348214285714286
 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,0.0,1.0,0.0,0.25,0.25,0.0,0.5,0.625,0.328125
 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,1.0,0.0,0.25,0.0,0.0,0.5,0.625,0.296875
+mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,0.8214285714285714,0.0,0.0,0.0,0.0,1.0,0.5,0.2901785714285714
+llama-2-chat:13:ggmlv3:q8_0,0.0,0.9285714285714286,0.0,0.0,0.0,0.0,0.5,0.8333333333333334,0.28273809523809523
+llama-2-chat:13:ggmlv3:q5_0,0.0,0.8928571428571429,0.0,0.0,0.0,0.0,0.5,0.75,0.26785714285714285
+llama-2-chat:7:ggmlv3:q4_0,0.0,0.6071428571428571,0.0,0.5,0.0,0.0,0.0,1.0,0.26339285714285715
+llama-2-chat:7:ggmlv3:q8_0,0.0,0.8571428571428571,0.0,0.5,0.0,0.0,0.0,0.6666666666666666,0.25297619047619047
+llama-2-chat:13:ggmlv3:q4_0,0.0,0.7142857142857143,0.0,0.25,0.0,0.0,0.5,0.5,0.2455357142857143
+llama-2-chat:7:ggmlv3:q2_K,0.2352941176470588,0.6071428571428571,0.0,0.5,0.0,0.0,0.0,0.5,0.23030462184873948
+llama-2-chat:13:ggmlv3:q2_K,0.0,0.5714285714285714,0.0,0.0,0.0,0.0,1.0,0.0,0.19642857142857142
diff --git a/benchmark/results/preprocessed_for_frontend/property_exists.csv b/benchmark/results/preprocessed_for_frontend/property_exists.csv
@@ -1,15 +1,15 @@
 Model name,Passed test cases,Total test cases,Score,Iterations
 gpt-3.5-turbo,4.0,4.0,1.0,2
 gpt-4,4.0,4.0,1.0,2
-llama-2-chat:13:ggmlv3:q2_K,0.0,2.0,0.0,2
-llama-2-chat:13:ggmlv3:q4_0,1.0,2.0,0.5,2
-llama-2-chat:13:ggmlv3:q5_0,1.5,2.0,0.75,2
-llama-2-chat:13:ggmlv3:q8_0,2.5,3.0,0.8333333333333334,2
-llama-2-chat:7:ggmlv3:q2_K,1.0,2.0,0.5,2
 llama-2-chat:7:ggmlv3:q4_0,3.0,3.0,1.0,2
-llama-2-chat:7:ggmlv3:q5_0,2.5,5.0,0.5,2
-llama-2-chat:7:ggmlv3:q8_0,2.0,3.0,0.6666666666666666,2
-mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1.0,2.0,0.5,2
+llama-2-chat:13:ggmlv3:q8_0,2.5,3.0,0.8333333333333334,2
+llama-2-chat:13:ggmlv3:q5_0,1.5,2.0,0.75,2
 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,3.0,4.0,0.75,2
+llama-2-chat:7:ggmlv3:q8_0,2.0,3.0,0.6666666666666666,2
 mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,2.5,4.0,0.625,2
 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,2.5,4.0,0.625,2
+llama-2-chat:13:ggmlv3:q4_0,1.0,2.0,0.5,2
+llama-2-chat:7:ggmlv3:q2_K,1.0,2.0,0.5,2
+llama-2-chat:7:ggmlv3:q5_0,2.5,5.0,0.5,2
+mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,1.0,2.0,0.5,2
+llama-2-chat:13:ggmlv3:q2_K,0.0,2.0,0.0,2
diff --git a/benchmark/results/preprocessed_for_frontend/property_selection.csv b/benchmark/results/preprocessed_for_frontend/property_selection.csv
@@ -1,11 +1,11 @@
 Model name,Passed test cases,Total test cases,Score,Iterations
-gpt-3.5-turbo,11.0,17.0,0.6470588235294118,2
 gpt-4,13.0,17.0,0.7647058823529411,2
+gpt-3.5-turbo,11.0,17.0,0.6470588235294118,2
+llama-2-chat:7:ggmlv3:q2_K,4.0,17.0,0.23529411764705882,2
 llama-2-chat:13:ggmlv3:q2_K,0.0,6.0,0.0,2
 llama-2-chat:13:ggmlv3:q4_0,0.0,6.0,0.0,2
 llama-2-chat:13:ggmlv3:q5_0,0.0,6.0,0.0,2
 llama-2-chat:13:ggmlv3:q8_0,0.0,6.0,0.0,2
-llama-2-chat:7:ggmlv3:q2_K,4.0,17.0,0.23529411764705882,2
 llama-2-chat:7:ggmlv3:q4_0,0.0,6.0,0.0,2
 llama-2-chat:7:ggmlv3:q5_0,0.0,6.0,0.0,2
 llama-2-chat:7:ggmlv3:q8_0,0.0,6.0,0.0,2

diff --git a/benchmark/results/preprocessed_for_frontend/query_generation.csv b/benchmark/results/preprocessed_for_frontend/query_generation.csv
@@ -1,15 +1,15 @@
 Model name,Passed test cases,Total test cases,Score,Iterations
 gpt-3.5-turbo,14.0,14.0,1.0,2
+mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,14.0,14.0,1.0,2
+mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,14.0,14.0,1.0,2
 gpt-4,13.5,14.0,0.9642857142857143,2
-llama-2-chat:13:ggmlv3:q2_K,8.0,14.0,0.5714285714285714,2
-llama-2-chat:13:ggmlv3:q4_0,10.0,14.0,0.7142857142857143,2
-llama-2-chat:13:ggmlv3:q5_0,12.5,14.0,0.8928571428571429,2
 llama-2-chat:13:ggmlv3:q8_0,13.0,14.0,0.9285714285714286,2
-llama-2-chat:7:ggmlv3:q2_K,8.5,14.0,0.6071428571428571,2
-llama-2-chat:7:ggmlv3:q4_0,8.5,14.0,0.6071428571428571,2
+mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,13.0,14.0,0.9285714285714286,2
+llama-2-chat:13:ggmlv3:q5_0,12.5,14.0,0.8928571428571429,2
 llama-2-chat:7:ggmlv3:q5_0,12.0,14.0,0.8571428571428571,2
 llama-2-chat:7:ggmlv3:q8_0,12.0,14.0,0.8571428571428571,2
 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,11.5,14.0,0.8214285714285714,2
-mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,13.0,14.0,0.9285714285714286,2
-mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,14.0,14.0,1.0,2
-mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,14.0,14.0,1.0,2
+llama-2-chat:13:ggmlv3:q4_0,10.0,14.0,0.7142857142857143,2
+llama-2-chat:7:ggmlv3:q2_K,8.5,14.0,0.6071428571428571,2
+llama-2-chat:7:ggmlv3:q4_0,8.5,14.0,0.6071428571428571,2
+llama-2-chat:13:ggmlv3:q2_K,8.0,14.0,0.5714285714285714,2
diff --git a/benchmark/results/preprocessed_for_frontend/relationship_selection.csv b/benchmark/results/preprocessed_for_frontend/relationship_selection.csv
@@ -1,15 +1,15 @@
 Model name,Passed test cases,Total test cases,Score,Iterations
 gpt-3.5-turbo,6.0,6.0,1.0,2
 gpt-4,6.0,6.0,1.0,2
+llama-2-chat:7:ggmlv3:q5_0,3.0,6.0,0.5,2
+mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1.5,6.0,0.25,2
 llama-2-chat:13:ggmlv3:q2_K,0.0,6.0,0.0,2
 llama-2-chat:13:ggmlv3:q4_0,0.0,6.0,0.0,2
 llama-2-chat:13:ggmlv3:q5_0,0.0,6.0,0.0,2
 llama-2-chat:13:ggmlv3:q8_0,0.0,6.0,0.0,2
 llama-2-chat:7:ggmlv3:q2_K,0.0,6.0,0.0,2
 llama-2-chat:7:ggmlv3:q4_0,0.0,6.0,0.0,2
-llama-2-chat:7:ggmlv3:q5_0,3.0,6.0,0.5,2
 llama-2-chat:7:ggmlv3:q8_0,0.0,6.0,0.0,2
 mixtral-instruct-v0.1:46_7:ggufv2:Q2_K,0.0,6.0,0.0,2
 mixtral-instruct-v0.1:46_7:ggufv2:Q4_0,0.0,6.0,0.0,2
-mixtral-instruct-v0.1:46_7:ggufv2:Q5_0,1.5,6.0,0.25,2
 mixtral-instruct-v0.1:46_7:ggufv2:Q8_0,0.0,6.0,0.0,2
diff --git a/docs/rag-demo.gif b/docs/rag-demo.gif
diff --git a/docs/rag-settings.png b/docs/rag-settings.png
diff --git a/docs/vignette-rag.md b/docs/vignette-rag.md
@@ -0,0 +1,87 @@
+# Vignette: Retrieval-Augmented Generation
+
+This vignette demonstrates the RAG module of BioChatter as used by the
+BioChatter Next application. This basic use case involves an LLM manuscript
+reading assistant and a vector database with embedded scientific manuscripts.
+The manuscripts are recent reviews on the topic of ABC transporters and drug
+resistance phenomena, which we currently investigate in our DECIDER cohort.
+Manuscripts such as these can be absent from the knowledge base of LLMs for
+various reasons, such as their recency or the fact that they are not open
+access. RAG opens up the possibility to retrieve relevant information from
+these manuscripts, and to inject it into the LLM's generation process.
+
+## Usage
+
+In BioChatter Next, we first activate the RAG functionality by clicking on the
+`RAG Settings` button in the sidebar. In the settings dialog, we can activate
+the functionality and upload an arbitrary number of documents, which is only
+limited by the scale of the vector database system. In this case, and for
+demonstration purposes, we uploaded the four manuscripts, which leads to the
+state shown in the screenshot below. You can find the citations below.
+
+![RAG Settings](rag-settings.png)
+
+Note that we chose to split the documents into fragments of 1000 characters
+each. We could have split by tokens instead, or changed the fragment length and
+overlap. Fragment length, overlap, and splitting by tokens or characters are
+decisions that should be made in tandem with the choice of LLM model, and with
+the number of fragments that should be retrieved for each query. Most
+importantly, the total text length should not exceed the input context length
+of the model. Here, we choose to inject 10 fragments per query.
+
+We can now start a new conversation (using the `New Persona` button in the
+sidebar), for which we select the `Research manuscript helper (RAG)` persona,
+which includes suitable contextual prompts. Upon activating the RAG in the chat
+(to signal that we wish to perform RAG for our question), we can enter into a
+dialogue with the assistant. We use gpt-3.5-turbo in this example, which is a
+very affordable model. The procedure is demonstrated in the GIF below.
+
+![RAG Demo](rag-demo.gif)
+
+## Comparison with ChatGPT
+
+We can ask the same question to ChatGPT 4 (only subscription access).  By
+employing web search, ChatGPT 4 is able to find the same study that was the RAG
+result we were asked about in our follow-up question (Xu et al. 2015); however,
+the answer is not as detailed as the answer provided by the RAG process, and
+includes fewer mentions of explicit studies. While it includes sensible
+information, there is no way of validating most of the claims made by the model.
+You can read the full answer
+[here](https://chat.openai.com/share/549f5bf7-b39c-42a4-9483-d07dcbf5b2d9).
+Overall, the quality of ChatGPT 4 is comparable to the RAG output, but the user
+has less control over the knowledge used, and the answer cannot include
+information that is not easily found on the web (for instance due to access
+restrictions). Of note, we used gpt-3.5-turbo in this example, which is less
+recent and much less expensive than GPT-4.
+
+Asking the naive ChatGPT 3.5 (free access), the same question yields a very
+different answer. The free version of ChatGPT cannot search the internet, and
+does not provide any citations for the general answer it provides. You can read
+the full answer
+[here](https://chat.openai.com/share/810075aa-69fe-4c06-859d-229e250f4c41). 
+
+## Conclusion
+
+In conclusion, the RAG procedure we implement is able to dramatically improve
+the quality of GPT-3 answers, making them comparable to GPT-4 with web search,
+and even surpassing it in some aspects. We invite you to try it out yourself, as
+we provide an instance of BioChatter Next, including GPT access and a vector
+database instance, free of charge at
+[https://next.biochatter.org](https://next.biochatter.org).
+
+## References
+
+- [1] [ABC transporters affects tumor immune microenvironment to regulate cancer
+immunotherapy and multidrug
+resistance](https://doi.org/10.1016/j.drup.2022.100905)
+
+- [2] [ABC transporters and the hallmarks of cancer: roles in cancer
+aggressiveness beyond multidrug
+resistance](https://doi.org/10.20892/j.issn.2095-3941.2019.0284)
+
+- [3] [Advances in the structure, mechanism and targeting of
+chemoresistance-linked ABC
+transporters](https://doi.org/10.1038/s41568-023-00612-3)
+
+- [4] [ATP-binding cassette (ABC) transporters in cancer: A review of recent
+updates](https://doi.org/10.1111/jebm.12434)
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -56,6 +56,8 @@ nav:
   - Benchmark:
       - Overview: benchmark-overview.md
       - All Results: benchmark-results.md
+  - Vignettes:
+      - Retrieval-Augmented Generation: vignette-rag.md
   - API Reference:
       - LLM Connectivity: llm_connect-reference.md
       - Vectorstore: vectorstore-reference.md