diff --git a/frontend/src/components/Graph/LegendsChip.tsx b/frontend/src/components/Graph/LegendsChip.tsx
index 7d2de8b74..2fdc16df2 100644
--- a/frontend/src/components/Graph/LegendsChip.tsx
+++ b/frontend/src/components/Graph/LegendsChip.tsx
@@ -1,12 +1,14 @@
-import { useMemo } from 'react';
import { LegendChipProps } from '../../types';
import Legend from '../UI/Legend';
-export const LegendsChip: React.FunctionComponent = ({ scheme, title, nodes }) => {
- const chunkcount = useMemo(
- () => [...new Set(nodes?.filter((n) => n?.labels?.includes(title)).map((i) => i.id))].length,
- []
+export const LegendsChip: React.FunctionComponent = ({
+ scheme,
+ label,
+ type,
+ count,
+ onClick,
+}) => {
+ return (
+
);
};
-export default QuickStarter;
\ No newline at end of file
+export default QuickStarter;
diff --git a/frontend/src/services/GetFiles.ts b/frontend/src/services/GetFiles.ts
index 0744ff047..056a9cc05 100644
--- a/frontend/src/services/GetFiles.ts
+++ b/frontend/src/services/GetFiles.ts
@@ -5,9 +5,7 @@ export const getSourceNodes = async (userCredentials: UserCredentials) => {
try {
const encodedstr = btoa(userCredentials.password);
const response = await api.get(
- `/sources_list?uri=${userCredentials.uri}&database=${userCredentials.database}&userName=${
- userCredentials.userName
- }&password=${encodedstr}`
+ `/sources_list?uri=${userCredentials.uri}&database=${userCredentials.database}&userName=${userCredentials.userName}&password=${encodedstr}`
);
return response;
} catch (error) {
diff --git a/frontend/src/utils/Constants.ts b/frontend/src/utils/Constants.ts
index ee2296a3a..4222e8a7b 100644
--- a/frontend/src/utils/Constants.ts
+++ b/frontend/src/utils/Constants.ts
@@ -59,7 +59,7 @@ export const defaultLLM = llms?.includes('openai-gpt-4o-mini')
export const chatModes =
process.env?.VITE_CHAT_MODES?.trim() != ''
? process.env.VITE_CHAT_MODES?.split(',')
- : ['vector', 'graph', 'graph+vector', 'fulltext', 'fulltext+graph'];
+ : ['vector', 'graph', 'graph+vector', 'fulltext', 'graph+vector+fulltext'];
export const chunkSize = process.env.VITE_CHUNK_SIZE ? parseInt(process.env.VITE_CHUNK_SIZE) : 1 * 1024 * 1024;
export const timeperpage = process.env.VITE_TIME_PER_PAGE ? parseInt(process.env.VITE_TIME_PER_PAGE) : 50;
export const timePerByte = 0.2;
@@ -232,7 +232,7 @@ export const graphLabels = {
noEntities: 'No Entities Found',
selectCheckbox: 'Select atleast one checkbox for graph view',
totalRelationships: 'Total Relationships',
- nodeSize: 30
+ nodeSize: 30,
};
export const RESULT_STEP_SIZE = 25;
From 7d0b431728ce642cdbddc9c033b84b0b2c8c92f6 Mon Sep 17 00:00:00 2001
From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Date: Thu, 22 Aug 2024 12:50:41 +0000
Subject: [PATCH 019/292] mode added to info model for entities
---
frontend/src/components/ChatBot/ChatInfoModal.tsx | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx
index 42dad1893..25391b00e 100644
--- a/frontend/src/components/ChatBot/ChatInfoModal.tsx
+++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx
@@ -175,7 +175,7 @@ const ChatInfoModal: React.FC = ({
) : (
{mode != 'graph' ? Sources used : <>>}
- {mode === 'graph+vector' || mode === 'graph' ? Top Entities used : <>>}
+ {mode === 'graph+vector' || mode === 'graph' || mode === 'graph+vector+fulltext' ? Top Entities used : <>>}
{mode === 'graph' && cypher_query?.trim().length ? (
Generated Cypher Query
) : (
From f2b1e172a70c8dfd63cda55007fa5ed4371e64a0 Mon Sep 17 00:00:00 2001
From: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
Date: Mon, 26 Aug 2024 09:07:54 +0000
Subject: [PATCH 020/292] Issue fixed, List out of index while getting status
of dicuement node
---
backend/src/main.py | 160 +++++++++++++++++++++++---------------------
1 file changed, 83 insertions(+), 77 deletions(-)
diff --git a/backend/src/main.py b/backend/src/main.py
index f7dd190ef..16eb0e622 100644
--- a/backend/src/main.py
+++ b/backend/src/main.py
@@ -277,91 +277,97 @@ def processing_source(uri, userName, password, database, model, file_name, pages
create_chunks_obj = CreateChunksofDocument(pages, graph)
chunks = create_chunks_obj.split_file_into_chunks()
chunkId_chunkDoc_list = create_relation_between_chunks(graph,file_name,chunks)
- if result[0]['Status'] != 'Processing':
- obj_source_node = sourceNode()
- status = "Processing"
- obj_source_node.file_name = file_name
- obj_source_node.status = status
- obj_source_node.total_chunks = len(chunks)
- obj_source_node.total_pages = len(pages)
- obj_source_node.model = model
- logging.info(file_name)
- logging.info(obj_source_node)
- graphDb_data_Access.update_source_node(obj_source_node)
-
- logging.info('Update the status as Processing')
- update_graph_chunk_processed = int(os.environ.get('UPDATE_GRAPH_CHUNKS_PROCESSED'))
- # selected_chunks = []
- is_cancelled_status = False
- job_status = "Completed"
- node_count = 0
- rel_count = 0
- for i in range(0, len(chunkId_chunkDoc_list), update_graph_chunk_processed):
- select_chunks_upto = i+update_graph_chunk_processed
- logging.info(f'Selected Chunks upto: {select_chunks_upto}')
- if len(chunkId_chunkDoc_list) <= select_chunks_upto:
- select_chunks_upto = len(chunkId_chunkDoc_list)
- selected_chunks = chunkId_chunkDoc_list[i:select_chunks_upto]
+
+ if len(result) > 0:
+ if result[0]['Status'] != 'Processing':
+ obj_source_node = sourceNode()
+ status = "Processing"
+ obj_source_node.file_name = file_name
+ obj_source_node.status = status
+ obj_source_node.total_chunks = len(chunks)
+ obj_source_node.total_pages = len(pages)
+ obj_source_node.model = model
+ logging.info(file_name)
+ logging.info(obj_source_node)
+ graphDb_data_Access.update_source_node(obj_source_node)
+
+ logging.info('Update the status as Processing')
+ update_graph_chunk_processed = int(os.environ.get('UPDATE_GRAPH_CHUNKS_PROCESSED'))
+ # selected_chunks = []
+ is_cancelled_status = False
+ job_status = "Completed"
+ node_count = 0
+ rel_count = 0
+ for i in range(0, len(chunkId_chunkDoc_list), update_graph_chunk_processed):
+ select_chunks_upto = i+update_graph_chunk_processed
+ logging.info(f'Selected Chunks upto: {select_chunks_upto}')
+ if len(chunkId_chunkDoc_list) <= select_chunks_upto:
+ select_chunks_upto = len(chunkId_chunkDoc_list)
+ selected_chunks = chunkId_chunkDoc_list[i:select_chunks_upto]
+ result = graphDb_data_Access.get_current_status_document_node(file_name)
+ is_cancelled_status = result[0]['is_cancelled']
+ logging.info(f"Value of is_cancelled : {result[0]['is_cancelled']}")
+ if bool(is_cancelled_status) == True:
+ job_status = "Cancelled"
+ logging.info('Exit from running loop of processing file')
+ exit
+ else:
+ node_count,rel_count = processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count)
+ end_time = datetime.now()
+ processed_time = end_time - start_time
+
+ obj_source_node = sourceNode()
+ obj_source_node.file_name = file_name
+ obj_source_node.updated_at = end_time
+ obj_source_node.processing_time = processed_time
+ obj_source_node.node_count = node_count
+ obj_source_node.processed_chunk = select_chunks_upto
+ obj_source_node.relationship_count = rel_count
+ graphDb_data_Access.update_source_node(obj_source_node)
+
result = graphDb_data_Access.get_current_status_document_node(file_name)
is_cancelled_status = result[0]['is_cancelled']
- logging.info(f"Value of is_cancelled : {result[0]['is_cancelled']}")
if bool(is_cancelled_status) == True:
- job_status = "Cancelled"
- logging.info('Exit from running loop of processing file')
- exit
- else:
- node_count,rel_count = processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count)
- end_time = datetime.now()
- processed_time = end_time - start_time
-
- obj_source_node = sourceNode()
- obj_source_node.file_name = file_name
- obj_source_node.updated_at = end_time
- obj_source_node.processing_time = processed_time
- obj_source_node.node_count = node_count
- obj_source_node.processed_chunk = select_chunks_upto
- obj_source_node.relationship_count = rel_count
- graphDb_data_Access.update_source_node(obj_source_node)
-
- result = graphDb_data_Access.get_current_status_document_node(file_name)
- is_cancelled_status = result[0]['is_cancelled']
- if bool(is_cancelled_status) == True:
- logging.info(f'Is_cancelled True at the end extraction')
- job_status = 'Cancelled'
- logging.info(f'Job Status at the end : {job_status}')
- end_time = datetime.now()
- processed_time = end_time - start_time
- obj_source_node = sourceNode()
- obj_source_node.file_name = file_name
- obj_source_node.status = job_status
- obj_source_node.processing_time = processed_time
+ logging.info(f'Is_cancelled True at the end extraction')
+ job_status = 'Cancelled'
+ logging.info(f'Job Status at the end : {job_status}')
+ end_time = datetime.now()
+ processed_time = end_time - start_time
+ obj_source_node = sourceNode()
+ obj_source_node.file_name = file_name
+ obj_source_node.status = job_status
+ obj_source_node.processing_time = processed_time
- graphDb_data_Access.update_source_node(obj_source_node)
- logging.info('Updated the nodeCount and relCount properties in Document node')
- logging.info(f'file:{file_name} extraction has been completed')
+ graphDb_data_Access.update_source_node(obj_source_node)
+ logging.info('Updated the nodeCount and relCount properties in Document node')
+ logging.info(f'file:{file_name} extraction has been completed')
- # merged_file_path have value only when file uploaded from local
-
- if is_uploaded_from_local:
- gcs_file_cache = os.environ.get('GCS_FILE_CACHE')
- if gcs_file_cache == 'True':
- folder_name = create_gcs_bucket_folder_name_hashed(uri, file_name)
- delete_file_from_gcs(BUCKET_UPLOAD,folder_name,file_name)
- else:
- delete_uploaded_local_file(merged_file_path, file_name)
+ # merged_file_path have value only when file uploaded from local
- return {
- "fileName": file_name,
- "nodeCount": node_count,
- "relationshipCount": rel_count,
- "processingTime": round(processed_time.total_seconds(),2),
- "status" : job_status,
- "model" : model,
- "success_count" : 1
- }
+ if is_uploaded_from_local:
+ gcs_file_cache = os.environ.get('GCS_FILE_CACHE')
+ if gcs_file_cache == 'True':
+ folder_name = create_gcs_bucket_folder_name_hashed(uri, file_name)
+ delete_file_from_gcs(BUCKET_UPLOAD,folder_name,file_name)
+ else:
+ delete_uploaded_local_file(merged_file_path, file_name)
+
+ return {
+ "fileName": file_name,
+ "nodeCount": node_count,
+ "relationshipCount": rel_count,
+ "processingTime": round(processed_time.total_seconds(),2),
+ "status" : job_status,
+ "model" : model,
+ "success_count" : 1
+ }
+ else:
+ logging.info('File does not process because it\'s already in Processing status')
else:
- logging.info('File does not process because it\'s already in Processing status')
+ error_message = "Unable to get the status of docuemnt node."
+ logging.error(error_message)
+ raise Exception(error_message)
def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship, node_count, rel_count):
#create vector index and update chunk node with embedding
From 94c493eaef19386e07793f253aee578c54e44509 Mon Sep 17 00:00:00 2001
From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Date: Mon, 26 Aug 2024 11:55:36 +0000
Subject: [PATCH 021/292] processing count updated on cancel
---
frontend/src/components/Content.tsx | 2 +-
frontend/src/components/FileTable.tsx | 7 +++++++
frontend/src/hooks/useSse.tsx | 2 +-
3 files changed, 9 insertions(+), 2 deletions(-)
diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx
index ec4fad9bb..a1a1e69b1 100644
--- a/frontend/src/components/Content.tsx
+++ b/frontend/src/components/Content.tsx
@@ -381,7 +381,7 @@ const Content: React.FC = ({
setextractLoading(false);
await postProcessing(userCredentials as UserCredentials, postProcessingTasks);
});
- } else if (queueFiles && !queue.isEmpty()) {
+ } else if (queueFiles && !queue.isEmpty()&&processingFilesCount {
setextractLoading(false);
diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx
index 70dff8f1a..8c334a7b4 100644
--- a/frontend/src/components/FileTable.tsx
+++ b/frontend/src/components/FileTable.tsx
@@ -761,6 +761,13 @@ const FileTable = forwardRef((props, ref) => {
return curfile;
})
);
+ setProcessedCount((prev) => {
+ if (prev == batchSize) {
+ return batchSize - 1;
+ }
+ return prev + 1;
+ });
+ queue.remove(fileName)
} else {
let errorobj = { error: res.data.error, message: res.data.message, fileName };
throw new Error(JSON.stringify(errorobj));
diff --git a/frontend/src/hooks/useSse.tsx b/frontend/src/hooks/useSse.tsx
index f8a07f61e..36be24c7b 100644
--- a/frontend/src/hooks/useSse.tsx
+++ b/frontend/src/hooks/useSse.tsx
@@ -45,7 +45,7 @@ export default function useServerSideEvent(
});
});
}
- } else if (status === 'Completed' || status === 'Cancelled') {
+ } else if (status === 'Completed') {
setFilesData((prevfiles) => {
return prevfiles.map((curfile) => {
if (curfile.name == fileName) {
From 3ef88b65492aee3c6fd9f1c0d70d4be424bb3869 Mon Sep 17 00:00:00 2001
From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Date: Tue, 27 Aug 2024 10:04:19 +0000
Subject: [PATCH 022/292] format fixes
---
.../src/components/ChatBot/ChatInfoModal.tsx | 6 +++++-
frontend/src/components/Content.tsx | 2 +-
frontend/src/components/FileTable.tsx | 2 +-
frontend/src/components/Graph/GraphViewModal.tsx | 16 ++++++++--------
.../Deduplication/index.tsx | 4 ++--
5 files changed, 17 insertions(+), 13 deletions(-)
diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx
index 25391b00e..89c15ea72 100644
--- a/frontend/src/components/ChatBot/ChatInfoModal.tsx
+++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx
@@ -175,7 +175,11 @@ const ChatInfoModal: React.FC = ({
) : (
{mode != 'graph' ? Sources used : <>>}
- {mode === 'graph+vector' || mode === 'graph' || mode === 'graph+vector+fulltext' ? Top Entities used : <>>}
+ {mode === 'graph+vector' || mode === 'graph' || mode === 'graph+vector+fulltext' ? (
+ Top Entities used
+ ) : (
+ <>>
+ )}
{mode === 'graph' && cypher_query?.trim().length ? (
Generated Cypher Query
) : (
diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx
index a1a1e69b1..64945d5d5 100644
--- a/frontend/src/components/Content.tsx
+++ b/frontend/src/components/Content.tsx
@@ -381,7 +381,7 @@ const Content: React.FC = ({
setextractLoading(false);
await postProcessing(userCredentials as UserCredentials, postProcessingTasks);
});
- } else if (queueFiles && !queue.isEmpty()&&processingFilesCount {
setextractLoading(false);
diff --git a/frontend/src/components/FileTable.tsx b/frontend/src/components/FileTable.tsx
index 8c334a7b4..23310eaf4 100644
--- a/frontend/src/components/FileTable.tsx
+++ b/frontend/src/components/FileTable.tsx
@@ -767,7 +767,7 @@ const FileTable = forwardRef((props, ref) => {
}
return prev + 1;
});
- queue.remove(fileName)
+ queue.remove(fileName);
} else {
let errorobj = { error: res.data.error, message: res.data.message, fileName };
throw new Error(JSON.stringify(errorobj));
diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx
index 0c8153fe0..39438b788 100644
--- a/frontend/src/components/Graph/GraphViewModal.tsx
+++ b/frontend/src/components/Graph/GraphViewModal.tsx
@@ -97,10 +97,10 @@ const GraphViewModal: React.FunctionComponent = ({
graphType.includes('DocumentChunk') && graphType.includes('Entities')
? queryMap.DocChunkEntities
: graphType.includes('DocumentChunk')
- ? queryMap.DocChunks
- : graphType.includes('Entities')
- ? queryMap.Entities
- : '';
+ ? queryMap.DocChunks
+ : graphType.includes('Entities')
+ ? queryMap.Entities
+ : '';
// fit graph to original position
const handleZoomToFit = () => {
@@ -135,10 +135,10 @@ const GraphViewModal: React.FunctionComponent = ({
const nodeRelationshipData =
viewPoint === graphLabels.showGraphView
? await graphQueryAPI(
- userCredentials as UserCredentials,
- graphQuery,
- selectedRows?.map((f) => f.name)
- )
+ userCredentials as UserCredentials,
+ graphQuery,
+ selectedRows?.map((f) => f.name)
+ )
: await graphQueryAPI(userCredentials as UserCredentials, graphQuery, [inspectedName ?? '']);
return nodeRelationshipData;
} catch (error: any) {
diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx
index 36fcff3d1..f5a021e30 100644
--- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx
+++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx
@@ -80,12 +80,12 @@ export default function DeduplicationTab() {
const onRemove = (nodeid: string, similarNodeId: string) => {
setDuplicateNodes((prev) => {
return prev.map((d) =>
- d.e.elementId === nodeid
+ (d.e.elementId === nodeid
? {
...d,
similar: d.similar.filter((n) => n.elementId != similarNodeId),
}
- : d
+ : d)
);
});
};
From dadaa288c6302e7ba5b00915ba4468dbc568d4d3 Mon Sep 17 00:00:00 2001
From: edenbuaa
Date: Tue, 27 Aug 2024 21:56:35 +0800
Subject: [PATCH 023/292] remove whitespace for enviroment variable which due
to an error "xxx may not contain whitespace" (#707)
---
example.env | 34 +++++++++++++++++-----------------
1 file changed, 17 insertions(+), 17 deletions(-)
diff --git a/example.env b/example.env
index ed33101fb..23bcc6e06 100644
--- a/example.env
+++ b/example.env
@@ -1,27 +1,27 @@
# Mandatory
-OPENAI_API_KEY = ""
-DIFFBOT_API_KEY = ""
+OPENAI_API_KEY=""
+DIFFBOT_API_KEY=""
# Optional Backend
-EMBEDDING_MODEL = "all-MiniLM-L6-v2"
-IS_EMBEDDING = "true"
-KNN_MIN_SCORE = "0.94"
+EMBEDDING_MODEL="all-MiniLM-L6-v2"
+IS_EMBEDDING="true"
+KNN_MIN_SCORE="0.94"
# Enable Gemini (default is False) | Can be False or True
-GEMINI_ENABLED = False
+GEMINI_ENABLED=False
# LLM_MODEL_CONFIG_ollama_llama3="llama3,http://host.docker.internal:11434"
# Enable Google Cloud logs (default is False) | Can be False or True
-GCP_LOG_METRICS_ENABLED = False
-NUMBER_OF_CHUNKS_TO_COMBINE = 6
-UPDATE_GRAPH_CHUNKS_PROCESSED = 20
-NEO4J_URI = "neo4j://database:7687"
-NEO4J_USERNAME = "neo4j"
-NEO4J_PASSWORD = "password"
-LANGCHAIN_API_KEY = ""
-LANGCHAIN_PROJECT = ""
-LANGCHAIN_TRACING_V2 = "true"
-LANGCHAIN_ENDPOINT = "https://api.smith.langchain.com"
-GCS_FILE_CACHE = False
+GCP_LOG_METRICS_ENABLED=False
+NUMBER_OF_CHUNKS_TO_COMBINE=6
+UPDATE_GRAPH_CHUNKS_PROCESSED=20
+NEO4J_URI="neo4j://database:7687"
+NEO4J_USERNAME="neo4j"
+NEO4J_PASSWORD="password"
+LANGCHAIN_API_KEY=""
+LANGCHAIN_PROJECT=""
+LANGCHAIN_TRACING_V2="true"
+LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
+GCS_FILE_CACHE=False
ENTITY_EMBEDDING=True
# Optional Frontend
From 4c6f676190a79159577112b3cd284c306d9ad146 Mon Sep 17 00:00:00 2001
From: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Date: Tue, 27 Aug 2024 15:06:45 +0000
Subject: [PATCH 024/292] updated disconnected nodes
---
backend/Performance_test.py | 1 +
backend/src/main.py | 1 +
backend/test_integrationqa.py | 122 ++++++++++++++++++++++++++--------
3 files changed, 96 insertions(+), 28 deletions(-)
diff --git a/backend/Performance_test.py b/backend/Performance_test.py
index fc0aee66f..712d3daf1 100644
--- a/backend/Performance_test.py
+++ b/backend/Performance_test.py
@@ -94,6 +94,7 @@ def performance_main():
for _ in range(CONCURRENT_REQUESTS):
futures.append(executor.submit(post_request_chunk))
+ # Chatbot request futures
# Chatbot request futures
# for message in CHATBOT_MESSAGES:
# futures.append(executor.submit(chatbot_request, message))
diff --git a/backend/src/main.py b/backend/src/main.py
index 16eb0e622..a7d5058a0 100644
--- a/backend/src/main.py
+++ b/backend/src/main.py
@@ -264,6 +264,7 @@ def processing_source(uri, userName, password, database, model, file_name, pages
graphDb_data_Access = graphDBdataAccess(graph)
result = graphDb_data_Access.get_current_status_document_node(file_name)
+ print(result)
logging.info("Break down file into chunks")
bad_chars = ['"', "\n", "'"]
for i in range(0,len(pages)):
diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py
index 20f3effb0..6f931058e 100644
--- a/backend/test_integrationqa.py
+++ b/backend/test_integrationqa.py
@@ -69,9 +69,7 @@ def test_graph_from_wikipedia(model_name):
file_name = "Ram_Mandir"
create_source_node_graph_url_wikipedia(graph, model_name, wiki_query, source_type)
- wiki_result = extract_graph_from_file_Wikipedia(
- URI, USERNAME, PASSWORD, DATABASE, model_name, file_name, 1, 'en', '', ''
- )
+ wiki_result = extract_graph_from_file_Wikipedia(URI, USERNAME, PASSWORD, DATABASE, model_name, file_name, 1, 'en', '', '')
logging.info("Wikipedia test done")
print(wiki_result)
@@ -85,6 +83,27 @@ def test_graph_from_wikipedia(model_name):
return wiki_result
+def test_graph_website(model_name):
+ """Test graph creation from a Website page."""
+ #graph, model, source_url, source_type
+ source_url = 'https://www.amazon.com/'
+ source_type = 'web-url'
+ create_source_node_graph_web_url(graph, model_name, source_url, source_type)
+
+ weburl_result = extract_graph_from_web_page(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, '', '')
+ logging.info("WebUrl test done")
+ print(weburl_result)
+
+ try:
+ assert weburl_result['status'] == 'Completed'
+ assert weburl_result['nodeCount'] > 0
+ assert weburl_result['relationshipCount'] > 0
+ print("Success")
+ except AssertionError as e:
+ print("Fail: ", e)
+ return weburl_result
+
+
def test_graph_from_youtube_video(model_name):
"""Test graph creation from a YouTube video."""
source_url = 'https://www.youtube.com/watch?v=T-qy-zPWgqA'
@@ -115,52 +134,99 @@ def test_chatbot_qna(model_name, mode='graph+vector'):
try:
assert len(QA_n_RAG['message']) > 20
+ return QA_n_RAG
+ print("Success")
+ except AssertionError as e:
+ print("Failed ", e)
+ return QA_n_RAG
+
+# Check the Functionality of Chatbot QnA for mode 'vector'
+def test_chatbot_QnA_vector(model_name):
+ model = model_name
+ QA_n_RAG_vector = QA_RAG(graph, model, 'Tell me about amazon', '[]', 1, 'vector')
+
+
+ print(QA_n_RAG_vector)
+ print(len(QA_n_RAG_vector['message']))
+ try:
+ assert len(QA_n_RAG_vector['message']) > 20
+ return QA_n_RAG_vector
print("Success")
except AssertionError as e:
print("Failed: ", e)
return QA_n_RAG
-def compare_graph_results(results):
- """
- Compare graph results across different models.
- Add custom logic here to compare graph data, nodes, and relationships.
- """
- # Placeholder logic for comparison
- print("Comparing results...")
- for i in range(len(results) - 1):
- result_a = results[i]
- result_b = results[i + 1]
- if result_a == result_b:
- print(f"Result {i} is identical to result {i+1}")
+#Get disconnected_nodes list
+def disconected_nodes():
+ #graph = create_graph_database_connection(uri, userName, password, database)
+ graphDb_data_Access = graphDBdataAccess(graph)
+ nodes_list, total_nodes = graphDb_data_Access.list_unconnected_nodes()
+ if total_nodes['total']>0:
+ return "True"
+ else:
+ return "False"
+
+#Delete disconnected_nodes list
+# def delete_disconected_nodes():
+# #graph = create_graph_database_connection(uri, userName, password, database)
+# graphDb_data_Access = graphDBdataAccess(graph)
+# result = graphDb_data_Access.delete_unconnected_nodes(unconnected_entities_list)
+
+#Get Duplicate_nodes
+def get_duplicate_nodes():
+ #graph = create_graph_database_connection(uri, userName, password, database)
+ graphDb_data_Access = graphDBdataAccess(graph)
+ nodes_list, total_nodes = graphDb_data_Access.get_duplicate_nodes_list()
+ if total_nodes['total']>0:
+ return "True"
else:
- print(f"Result {i} differs from result {i+1}")
+ return "False"
+ print(nodes_list)
+ print(total_nodes)
+
+# def compare_graph_results(results):
+# """
+# Compare graph results across different models.
+# Add custom logic here to compare graph data, nodes, and relationships.
+# """
+# # Placeholder logic for comparison
+# print("Comparing results...")
+# for i in range(len(results) - 1):
+# result_a = results[i]
+# result_b = results[i + 1]
+# if result_a == result_b:
+# print(f"Result {i} is identical to result {i+1}")
+# else:
+# print(f"Result {i} differs from result {i+1}")
def run_tests():
final_list = []
error_list = []
- models = [
- 'openai-gpt-3.5', 'openai-gpt-4o', 'openai-gpt-4o-mini', 'azure_ai_gpt_35',
- 'azure_ai_gpt_4o', 'anthropic_claude_3_5_sonnet', 'fireworks_v3p1_405b',
- 'fireworks_llama_v3_70b', 'ollama_llama3', 'bedrock_claude_3_5_sonnet'
- ]
+ models = ['openai-gpt-3.5', 'openai-gpt-4o']
for model_name in models:
try:
- final_list.append(test_graph_from_file_local(model_name))
+ # final_list.append(test_graph_from_file_local(model_name))
final_list.append(test_graph_from_wikipedia(model_name))
- final_list.append(test_graph_from_youtube_video(model_name))
- final_list.append(test_chatbot_qna(model_name))
- final_list.append(test_chatbot_qna(model_name, mode='vector'))
- final_list.append(test_chatbot_qna(model_name, mode='hybrid'))
+ # final_list.append(test_graph_website(model_name))
+ # final_list.append(test_graph_from_youtube_video(model_name))
+ # final_list.append(test_chatbot_qna(model_name))
+ # final_list.append(test_chatbot_qna(model_name, mode='vector'))
+ # final_list.append(test_chatbot_qna(model_name, mode='hybrid'))
except Exception as e:
error_list.append((model_name, str(e)))
#Compare and log diffrences in graph results
- compare_graph_results(final_list) # Pass the final_list to comapre_graph_results
-
+ # compare_graph_results(final_list) # Pass the final_list to comapre_graph_results
+
+ dis = disconected_nodes()
+ dup = get_duplicate_nodes()
# Save final results to CSV
df = pd.DataFrame(final_list)
+ print(df)
df['execution_date'] = dt.today().strftime('%Y-%m-%d')
+ df['disconnected_nodes']=dis
+ df['get_duplicate_nodes']=dup
df.to_csv(f"Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False)
# Save error details to CSV
From 568db51c733e5b9cc140ea0d6aeaf0d63b842f52 Mon Sep 17 00:00:00 2001
From: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Date: Tue, 27 Aug 2024 15:11:14 +0000
Subject: [PATCH 025/292] updated disconnected nodes
---
backend/test_integrationqa.py | 31 ++++++-------------------------
1 file changed, 6 insertions(+), 25 deletions(-)
diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py
index 6f931058e..b2222ebea 100644
--- a/backend/test_integrationqa.py
+++ b/backend/test_integrationqa.py
@@ -126,7 +126,7 @@ def test_graph_from_youtube_video(model_name):
return youtube_result
-def test_chatbot_qna(model_name, mode='graph+vector'):
+def test_chatbot_qna(model_name, mode='vector'):
"""Test chatbot QnA functionality for different modes."""
QA_n_RAG = QA_RAG(graph, model_name, 'Tell me about amazon', '[]', 1, mode)
print(QA_n_RAG)
@@ -139,24 +139,7 @@ def test_chatbot_qna(model_name, mode='graph+vector'):
except AssertionError as e:
print("Failed ", e)
return QA_n_RAG
-
-# Check the Functionality of Chatbot QnA for mode 'vector'
-def test_chatbot_QnA_vector(model_name):
- model = model_name
- QA_n_RAG_vector = QA_RAG(graph, model, 'Tell me about amazon', '[]', 1, 'vector')
-
-
- print(QA_n_RAG_vector)
- print(len(QA_n_RAG_vector['message']))
- try:
- assert len(QA_n_RAG_vector['message']) > 20
- return QA_n_RAG_vector
- print("Success")
- except AssertionError as e:
- print("Failed: ", e)
-
- return QA_n_RAG
-
+
#Get disconnected_nodes list
def disconected_nodes():
#graph = create_graph_database_connection(uri, userName, password, database)
@@ -182,8 +165,6 @@ def get_duplicate_nodes():
return "True"
else:
return "False"
- print(nodes_list)
- print(total_nodes)
# def compare_graph_results(results):
# """
@@ -208,12 +189,12 @@ def run_tests():
for model_name in models:
try:
# final_list.append(test_graph_from_file_local(model_name))
- final_list.append(test_graph_from_wikipedia(model_name))
- # final_list.append(test_graph_website(model_name))
+ # final_list.append(test_graph_from_wikipedia(model_name))
+ final_list.append(test_graph_website(model_name))
# final_list.append(test_graph_from_youtube_video(model_name))
- # final_list.append(test_chatbot_qna(model_name))
+ final_list.append(test_chatbot_qna(model_name))
# final_list.append(test_chatbot_qna(model_name, mode='vector'))
- # final_list.append(test_chatbot_qna(model_name, mode='hybrid'))
+ # final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext'))
except Exception as e:
error_list.append((model_name, str(e)))
#Compare and log diffrences in graph results
From 501ec6b741622282bd13c37d0713a2dfaccda78a Mon Sep 17 00:00:00 2001
From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Date: Wed, 28 Aug 2024 05:02:20 +0000
Subject: [PATCH 026/292] fix: Processed count update on failed condition
---
frontend/src/components/Content.tsx | 66 +++++++++++++++++++----------
frontend/src/hooks/useSse.tsx | 3 +-
2 files changed, 44 insertions(+), 25 deletions(-)
diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx
index 64945d5d5..039c04106 100644
--- a/frontend/src/components/Content.tsx
+++ b/frontend/src/components/Content.tsx
@@ -31,6 +31,8 @@ import FallBackDialog from './UI/FallBackDialog';
import DeletePopUp from './Popups/DeletePopUp/DeletePopUp';
import GraphEnhancementDialog from './Popups/GraphEnhancementDialog';
import { tokens } from '@neo4j-ndl/base';
+import axios from 'axios';
+
const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal'));
const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog'));
let afterFirstRender = false;
@@ -180,6 +182,7 @@ const Content: React.FC = ({
};
const extractHandler = async (fileItem: CustomFile, uid: string) => {
+ queue.remove(fileItem.name as string);
try {
setFilesData((prevfiles) =>
prevfiles.map((curfile) => {
@@ -252,28 +255,45 @@ const Content: React.FC = ({
});
}
} catch (err: any) {
- const error = JSON.parse(err.message);
- if (Object.keys(error).includes('fileName')) {
- const { message } = error;
- const { fileName } = error;
- const errorMessage = error.message;
- setalertDetails({
- showAlert: true,
- alertType: 'error',
- alertMessage: message,
- });
- setFilesData((prevfiles) =>
- prevfiles.map((curfile) => {
- if (curfile.name == fileName) {
- return {
- ...curfile,
- status: 'Failed',
- errorMessage,
- };
- }
- return curfile;
- })
- );
+ if (err instanceof Error) {
+ try {
+ const error = JSON.parse(err.message);
+ if (Object.keys(error).includes('fileName')) {
+ setProcessedCount((prev) => {
+ if (prev == batchSize) {
+ return batchSize - 1;
+ }
+ return prev + 1;
+ });
+ const { message, fileName } = error;
+ queue.remove(fileName);
+ const errorMessage = error.message;
+ setalertDetails({
+ showAlert: true,
+ alertType: 'error',
+ alertMessage: message,
+ });
+ setFilesData((prevfiles) =>
+ prevfiles.map((curfile) => {
+ if (curfile.name == fileName) {
+ return { ...curfile, status: 'Failed', errorMessage };
+ }
+ return curfile;
+ })
+ );
+ } else {
+ console.error('Unexpected error format:', error);
+ }
+ } catch (parseError) {
+ if (axios.isAxiosError(err)) {
+ const axiosErrorMessage = err.response?.data?.message || err.message;
+ console.error('Axios error occurred:', axiosErrorMessage);
+ } else {
+ console.error('An unexpected error occurred:', err.message);
+ }
+ }
+ } else {
+ console.error('An unknown error occurred:', err);
}
}
};
@@ -839,4 +859,4 @@ const Content: React.FC = ({
);
};
-export default Content;
+export default Content;
\ No newline at end of file
diff --git a/frontend/src/hooks/useSse.tsx b/frontend/src/hooks/useSse.tsx
index 36be24c7b..8b063751c 100644
--- a/frontend/src/hooks/useSse.tsx
+++ b/frontend/src/hooks/useSse.tsx
@@ -7,7 +7,7 @@ export default function useServerSideEvent(
alertHandler: (inMinutes: boolean, minutes: number, filename: string) => void,
errorHandler: (filename: string) => void
) {
- const { setFilesData, setProcessedCount, queue } = useFileContext();
+ const { setFilesData, setProcessedCount } = useFileContext();
function updateStatusForLargeFiles(eventSourceRes: eventResponsetypes) {
const {
fileName,
@@ -67,7 +67,6 @@ export default function useServerSideEvent(
}
return prev + 1;
});
- queue.remove(fileName);
} else if (eventSourceRes.status === 'Failed') {
setFilesData((prevfiles) => {
return prevfiles.map((curfile) => {
From 9941474aa0f213bd0afebcece2a70e4b8d275e0c Mon Sep 17 00:00:00 2001
From: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Date: Wed, 28 Aug 2024 11:23:03 +0000
Subject: [PATCH 027/292] added disconnected and up nodes
---
backend/test_integrationqa.py | 42 ++++++++++++++++++++++++-----------
1 file changed, 29 insertions(+), 13 deletions(-)
diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py
index b2222ebea..4d0a5e1ea 100644
--- a/backend/test_integrationqa.py
+++ b/backend/test_integrationqa.py
@@ -1,3 +1,4 @@
+import json
import os
import shutil
import logging
@@ -145,16 +146,24 @@ def disconected_nodes():
#graph = create_graph_database_connection(uri, userName, password, database)
graphDb_data_Access = graphDBdataAccess(graph)
nodes_list, total_nodes = graphDb_data_Access.list_unconnected_nodes()
+ print(nodes_list[0]["e"]["elementId"])
+
+ status = "False"
+
if total_nodes['total']>0:
- return "True"
+ status = "True"
else:
- return "False"
+ status = "False"
+
+ return nodes_list[0]["e"]["elementId"], status
#Delete disconnected_nodes list
-# def delete_disconected_nodes():
-# #graph = create_graph_database_connection(uri, userName, password, database)
-# graphDb_data_Access = graphDBdataAccess(graph)
-# result = graphDb_data_Access.delete_unconnected_nodes(unconnected_entities_list)
+def delete_disconected_nodes(lst_element_id):
+ print(f'disconnect elementid list {lst_element_id}')
+ #graph = create_graph_database_connection(uri, userName, password, database)
+ graphDb_data_Access = graphDBdataAccess(graph)
+ result = graphDb_data_Access.delete_unconnected_nodes(json.dumps(lst_element_id))
+ print(f'delete disconnect api result {result}')
#Get Duplicate_nodes
def get_duplicate_nodes():
@@ -166,6 +175,11 @@ def get_duplicate_nodes():
else:
return "False"
+#Merge Duplicate_nodes
+def test_merge_duplicate_nodes():
+ graphDb_data_Access = graphDBdataAccess(graph)
+ result = graphDb_data_Access.merge_duplicate_nodes(duplicate_nodes_list)
+
# def compare_graph_results(results):
# """
# Compare graph results across different models.
@@ -188,25 +202,27 @@ def run_tests():
for model_name in models:
try:
- # final_list.append(test_graph_from_file_local(model_name))
- # final_list.append(test_graph_from_wikipedia(model_name))
+ final_list.append(test_graph_from_file_local(model_name))
+ final_list.append(test_graph_from_wikipedia(model_name))
final_list.append(test_graph_website(model_name))
- # final_list.append(test_graph_from_youtube_video(model_name))
+ final_list.append(test_graph_from_youtube_video(model_name))
final_list.append(test_chatbot_qna(model_name))
- # final_list.append(test_chatbot_qna(model_name, mode='vector'))
- # final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext'))
+ final_list.append(test_chatbot_qna(model_name, mode='vector'))
+ final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext'))
except Exception as e:
error_list.append((model_name, str(e)))
#Compare and log diffrences in graph results
# compare_graph_results(final_list) # Pass the final_list to comapre_graph_results
- dis = disconected_nodes()
+ dis_elementid, dis_status = disconected_nodes()
+ lst_element_id = [dis_elementid]
+ delete_disconected_nodes(lst_element_id)
dup = get_duplicate_nodes()
# Save final results to CSV
df = pd.DataFrame(final_list)
print(df)
df['execution_date'] = dt.today().strftime('%Y-%m-%d')
- df['disconnected_nodes']=dis
+ df['disconnected_nodes']=dis_status
df['get_duplicate_nodes']=dup
df.to_csv(f"Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False)
From 8ae3b99d72edd456536e1a2b67a1f5d5f1b7d188 Mon Sep 17 00:00:00 2001
From: vasanthasaikalluri
<165021735+vasanthasaikalluri@users.noreply.github.com>
Date: Wed, 28 Aug 2024 17:15:49 +0000
Subject: [PATCH 028/292] removed __Entity__ labels
---
backend/src/communities.py | 85 ++++++++++++++++++++++-----------
backend/src/shared/common_fn.py | 4 +-
2 files changed, 58 insertions(+), 31 deletions(-)
diff --git a/backend/src/communities.py b/backend/src/communities.py
index b795ae4ed..e493a01dc 100644
--- a/backend/src/communities.py
+++ b/backend/src/communities.py
@@ -8,13 +8,37 @@
from tqdm import tqdm
-COMMUNITY_PROJECT_NAME = "communities"
-NODE_PROJECTION = "__Entity__"
+COMMUNITY_PROJECTION_NAME = "communities"
+NODE_PROJECTION = "!Chunk&!Document&!__Community__"
MAX_WORKERS = 10
+
+CREATE_COMMUNITY_GRAPH_PROJECTION = """
+MATCH (source:{node_projection})
+OPTIONAL MATCH (source)-[]->(target:{node_projection})
+WITH source, target, count(*) AS weight
+WITH gds.graph.project(
+ {project_name},
+ source,
+ target,
+ {{
+ relationshipProperties: {{ weight: weight }}
+ }},
+ {{
+ undirectedRelationshipTypes: ['*']
+ }}
+) AS g
+RETURN
+ g.graphName AS graph_name,
+ g.nodeCount AS nodes,
+ g.relationshipCount AS rels
+"""
+
+
CREATE_COMMUNITY_CONSTRAINT = "CREATE CONSTRAINT IF NOT EXISTS FOR (c:__Community__) REQUIRE c.id IS UNIQUE;"
CREATE_COMMUNITY_LEVELS = """
-MATCH (e:`__Entity__`)
+MATCH (e:!Chunk&!Document&!__Community__)
+WHERE e.communities is NOT NULL
UNWIND range(0, size(e.communities) - 1 , 1) AS index
CALL {
WITH e, index
@@ -39,7 +63,7 @@
RETURN count(*)
"""
CREATE_COMMUNITY_RANKS = """
-MATCH (c:__Community__)<-[:IN_COMMUNITY*]-(:__Entity__)<-[:MENTIONS]-(d:Document)
+MATCH (c:__Community__)<-[:IN_COMMUNITY*]-(:!Chunk&!Document&!__Community__)<-[:MENTIONS]-(d:Document)
WITH c, count(distinct d) AS rank
SET c.community_rank = rank;
"""
@@ -50,8 +74,7 @@
SET n.weight = chunkCount"""
GET_COMMUNITY_INFO = """
-MATCH (c:`__Community__`)<-[:IN_COMMUNITY*]-(e:__Entity__)
-WHERE c.level IN [0,1,4]
+MATCH (c:`__Community__`)<-[:IN_COMMUNITY*]-(e:!Chunk&!Document&!__Community__)
WITH c, collect(e ) AS nodes
WHERE size(nodes) > 1
CALL apoc.path.subgraphAll(nodes[0], {
@@ -90,39 +113,43 @@ def get_gds_driver(uri, username, password, database):
logging.error(f"Failed to create GDS driver: {e}")
raise
-def create_community_graph_project(gds, project_name=COMMUNITY_PROJECT_NAME, node_projection=NODE_PROJECTION):
+def create_community_graph_projection(gds, project_name=COMMUNITY_PROJECTION_NAME, node_projection=NODE_PROJECTION):
try:
existing_projects = gds.graph.list()
project_exists = existing_projects["graphName"].str.contains(project_name, regex=False).any()
if project_exists:
- logging.info(f"Project '{project_name}' already exists. Dropping it.")
+ logging.info(f"Projection '{project_name}' already exists. Dropping it.")
gds.graph.drop(project_name)
logging.info(f"Creating new graph project '{project_name}'.")
- graph_project, result = gds.graph.project(
- project_name,
- node_projection,
- {
- "_ALL_": {
- "type": "*",
- "orientation": "UNDIRECTED",
- "properties": {
- "weight": {
- "property": "*",
- "aggregation": "COUNT"
- }
- }
- }
- }
- )
- logging.info(f"Graph project '{project_name}' created successfully.")
- return graph_project, result
+ # graph_project, result = gds.graph.project(
+ # project_name,
+ # node_projection,
+ # {
+ # "_ALL_": {
+ # "type": "*",
+ # "orientation": "UNDIRECTED",
+ # "properties": {
+ # "weight": {
+ # "property": "*",
+ # "aggregation": "COUNT"
+ # }
+ # }
+ # }
+ # }
+ # )
+ projection_query = CREATE_COMMUNITY_GRAPH_PROJECTION.format(node_projection=node_projection,project_name=project_name)
+ graph_projection_result = gds.run_cypher(projection_query)
+ projection_result = graph_projection_result.to_dict(orient="records")[0]
+ logging.info(f"Graph projection '{projection_result['graph_name']}' created successfully with {projection_result['nodes']} nodes and {projection_result['rels']} relationships.")
+ graph_project = gds.graph.get(projection_result['graph_name'])
+ return graph_project
except Exception as e:
logging.error(f"Failed to create community graph project: {e}")
raise
-def write_communities(gds, graph_project, project_name=COMMUNITY_PROJECT_NAME):
+def write_communities(gds, graph_project, project_name=COMMUNITY_PROJECTION_NAME):
try:
logging.info(f"Writing communities to the graph project '{project_name}'.")
gds.leiden.write(
@@ -231,10 +258,10 @@ def create_community_properties(graph, model):
logging.error(f"Failed to create community properties: {e}")
raise
-def create_communities(uri, username, password, database,graph,model):
+def create_communities(uri, username, password, database,model):
try:
gds = get_gds_driver(uri, username, password, database)
- graph_project, result = create_community_graph_project(gds)
+ graph_project = create_community_graph_projection(gds)
write_communities_sucess = write_communities(gds, graph_project)
if write_communities_sucess:
logging.info("Applying community constraint to the graph.")
diff --git a/backend/src/shared/common_fn.py b/backend/src/shared/common_fn.py
index 6d24912c7..2037682e7 100644
--- a/backend/src/shared/common_fn.py
+++ b/backend/src/shared/common_fn.py
@@ -94,8 +94,8 @@ def load_embedding_model(embedding_model_name: str):
return embeddings, dimension
def save_graphDocuments_in_neo4j(graph:Neo4jGraph, graph_document_list:List[GraphDocument]):
- graph.add_graph_documents(graph_document_list, baseEntityLabel=True,include_source=True)
- # graph.add_graph_documents(graph_document_list)
+ # graph.add_graph_documents(graph_document_list, baseEntityLabel=True,include_source=True)
+ graph.add_graph_documents(graph_document_list)
def handle_backticks_nodes_relationship_id_type(graph_document_list:List[GraphDocument]):
for graph_document in graph_document_list:
From 450ba6fd33ccf4d05a895a97a9df4228dde2816d Mon Sep 17 00:00:00 2001
From: vasanthasaikalluri
<165021735+vasanthasaikalluri@users.noreply.github.com>
Date: Wed, 28 Aug 2024 17:56:57 +0000
Subject: [PATCH 029/292] removed graph_object
---
backend/src/communities.py | 21 ++++++++++-----------
1 file changed, 10 insertions(+), 11 deletions(-)
diff --git a/backend/src/communities.py b/backend/src/communities.py
index e493a01dc..cfbb1cc21 100644
--- a/backend/src/communities.py
+++ b/backend/src/communities.py
@@ -99,7 +99,6 @@
Summary:"""
-
def get_gds_driver(uri, username, password, database):
try:
gds = GraphDataScience(
@@ -216,9 +215,9 @@ def process_community(community, community_chain):
logging.error(f"Failed to process community {community.get('communityId', 'unknown')}: {e}")
raise
-def create_community_summaries(graph, model):
+def create_community_summaries(gds, model):
try:
- community_info_list = graph.query(GET_COMMUNITY_INFO)
+ community_info_list = gds.run_cypher(GET_COMMUNITY_INFO)
community_chain = get_community_chain(model)
summaries = []
@@ -231,28 +230,28 @@ def create_community_summaries(graph, model):
except Exception as e:
logging.error(f"Failed to retrieve result for a community: {e}")
- graph.query(STORE_COMMUNITY_SUMMARIES, params={"data": summaries})
+ gds.run_cypher(STORE_COMMUNITY_SUMMARIES, params={"data": summaries})
except Exception as e:
logging.error(f"Failed to create community summaries: {e}")
raise
-def create_community_properties(graph, model):
+def create_community_properties(gds, model):
try:
# Create community levels
- graph.query(CREATE_COMMUNITY_LEVELS)
+ gds.run_cypher(CREATE_COMMUNITY_LEVELS)
logging.info("Successfully created community levels.")
# Create community ranks
- graph.query(CREATE_COMMUNITY_RANKS)
+ gds.run_cypher(CREATE_COMMUNITY_RANKS)
logging.info("Successfully created community ranks.")
# Create community weights
- graph.query(CREATE_COMMUNITY_WEIGHTS)
+ gds.run_cypher(CREATE_COMMUNITY_WEIGHTS)
logging.info("Successfully created community weights.")
# Create community summaries
- create_community_summaries(graph, model)
+ create_community_summaries(gds, model)
logging.info("Successfully created community summaries.")
except Exception as e:
logging.error(f"Failed to create community properties: {e}")
@@ -265,8 +264,8 @@ def create_communities(uri, username, password, database,model):
write_communities_sucess = write_communities(gds, graph_project)
if write_communities_sucess:
logging.info("Applying community constraint to the graph.")
- graph.query(CREATE_COMMUNITY_CONSTRAINT)
- create_community_properties(graph,model)
+ gds.run_cypher(CREATE_COMMUNITY_CONSTRAINT)
+ create_community_properties(gds,model)
logging.info("Communities creation process completed successfully.")
else:
logging.warning("Failed to write communities. Constraint was not applied.")
From 266c8121883b29445ba0020f72038d1a99d0d30c Mon Sep 17 00:00:00 2001
From: vasanthasaikalluri
<165021735+vasanthasaikalluri@users.noreply.github.com>
Date: Wed, 28 Aug 2024 18:02:49 +0000
Subject: [PATCH 030/292] removed graph object in the function
---
backend/score.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/backend/score.py b/backend/score.py
index 717221520..7b06def44 100644
--- a/backend/score.py
+++ b/backend/score.py
@@ -244,7 +244,7 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database
if "create_communities" in tasks:
model = "openai-gpt-4o"
- await asyncio.to_thread(create_communities, uri, userName, password, database,graph,model)
+ await asyncio.to_thread(create_communities, uri, userName, password, database,model)
josn_obj = {'api_name': 'post_processing/create_communities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
logger.log_struct(josn_obj)
logging.info(f'created communities')
From cac1963d473ca9aa7c758336f1baefc465a4bd6e Mon Sep 17 00:00:00 2001
From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Date: Thu, 29 Aug 2024 07:35:41 +0000
Subject: [PATCH 031/292] resetting the alert message on success scenario
---
.../src/components/Popups/ConnectionModal/ConnectionModal.tsx | 3 +++
1 file changed, 3 insertions(+)
diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx
index 63e2d7883..a7b3fcb1f 100644
--- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx
+++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx
@@ -266,6 +266,9 @@ export default function ConnectionModal({
}
}
setTimeout(() => {
+ if(connectionMessage?.type!="danger"){
+ setMessage({ type: 'unknown', content: '' })
+ }
setPassword('');
}, 3000);
};
From 43ec5691ff1729fc8936e5345bcc78ab2fadb8e0 Mon Sep 17 00:00:00 2001
From: vasanthasaikalluri
<165021735+vasanthasaikalluri@users.noreply.github.com>
Date: Thu, 29 Aug 2024 13:03:06 +0000
Subject: [PATCH 032/292] Modified queries
---
backend/src/communities.py | 39 ++++++++++++++++------------------
backend/src/post_processing.py | 4 ++--
2 files changed, 20 insertions(+), 23 deletions(-)
diff --git a/backend/src/communities.py b/backend/src/communities.py
index cfbb1cc21..dbcb73ebc 100644
--- a/backend/src/communities.py
+++ b/backend/src/communities.py
@@ -16,25 +16,20 @@
CREATE_COMMUNITY_GRAPH_PROJECTION = """
MATCH (source:{node_projection})
OPTIONAL MATCH (source)-[]->(target:{node_projection})
-WITH source, target, count(*) AS weight
+WITH source, target, count(*) as weight
WITH gds.graph.project(
- {project_name},
- source,
- target,
- {{
- relationshipProperties: {{ weight: weight }}
- }},
- {{
- undirectedRelationshipTypes: ['*']
- }}
-) AS g
+ '{project_name}',
+ source,
+ target,
+ {{
+ relationshipProperties: {{ weight: weight }}
+ }},
+ {{undirectedRelationshipTypes: ['*']}}
+ ) AS g
RETURN
- g.graphName AS graph_name,
- g.nodeCount AS nodes,
- g.relationshipCount AS rels
+ g.graphName AS graph_name, g.nodeCount AS nodes, g.relationshipCount AS rels
"""
-
CREATE_COMMUNITY_CONSTRAINT = "CREATE CONSTRAINT IF NOT EXISTS FOR (c:__Community__) REQUIRE c.id IS UNIQUE;"
CREATE_COMMUNITY_LEVELS = """
MATCH (e:!Chunk&!Document&!__Community__)
@@ -63,7 +58,7 @@
RETURN count(*)
"""
CREATE_COMMUNITY_RANKS = """
-MATCH (c:__Community__)<-[:IN_COMMUNITY*]-(:!Chunk&!Document&!__Community__)<-[:MENTIONS]-(d:Document)
+MATCH (c:__Community__)<-[:IN_COMMUNITY*]-(:!Chunk&!Document&!__Community__)<-[HAS_ENTITY]-(:Chunk)<-[]-(d:Document)
WITH c, count(distinct d) AS rank
SET c.community_rank = rank;
"""
@@ -74,8 +69,8 @@
SET n.weight = chunkCount"""
GET_COMMUNITY_INFO = """
-MATCH (c:`__Community__`)<-[:IN_COMMUNITY*]-(e:!Chunk&!Document&!__Community__)
-WITH c, collect(e ) AS nodes
+MATCH (c:`__Community__`)<-[:IN_COMMUNITY]-(e)
+WITH c, collect(e) AS nodes
WHERE size(nodes) > 1
CALL apoc.path.subgraphAll(nodes[0], {
whitelistNodes:nodes
@@ -83,7 +78,7 @@
YIELD relationships
RETURN c.id AS communityId,
[n in nodes | {id: n.id, description: n.description, type: [el in labels(n) WHERE el <> '__Entity__'][0]}] AS nodes,
- [r in relationships | {start: startNode(r).id, type: type(r), end: endNode(r).id, description: r.description}] AS rels
+ [r in relationships | {start: startNode(r).id, type: type(r), end: endNode(r).id}] AS rels
"""
STORE_COMMUNITY_SUMMARIES = """
@@ -200,7 +195,6 @@ def prepare_string(community_data):
relationship_type = rel['type']
relationship_description = f", description: {rel['description']}" if 'description' in rel and rel['description'] else ""
relationships_description += f"({start_node})-[:{relationship_type}]->({end_node}){relationship_description}\n"
-
return nodes_description + "\n" + relationships_description
except Exception as e:
logging.error(f"Failed to prepare string from community data: {e}")
@@ -221,8 +215,11 @@ def create_community_summaries(gds, model):
community_chain = get_community_chain(model)
summaries = []
+ futures = []
with ThreadPoolExecutor() as executor:
- futures = {executor.submit(process_community, community, community_chain): community for community in community_info_list}
+ for _,community in community_info_list.iterrows():
+ future = executor.submit(process_community, community, community_chain)
+ futures.append(future)
for future in as_completed(futures):
try:
diff --git a/backend/src/post_processing.py b/backend/src/post_processing.py
index fa582e107..7d038c61b 100644
--- a/backend/src/post_processing.py
+++ b/backend/src/post_processing.py
@@ -8,7 +8,7 @@
DROP_INDEX_QUERY = "DROP INDEX entities IF EXISTS;"
LABELS_QUERY = "CALL db.labels()"
FULL_TEXT_QUERY = "CREATE FULLTEXT INDEX entities FOR (n{labels_str}) ON EACH [n.id, n.description];"
-FILTER_LABELS = ["Chunk","Document"]
+FILTER_LABELS = ["Chunk","Document","__Community__"]
HYBRID_SEARCH_INDEX_DROP_QUERY = "DROP INDEX keyword IF EXISTS;"
@@ -80,7 +80,7 @@ def create_entity_embedding(graph:Neo4jGraph):
def fetch_entities_for_embedding(graph):
query = """
MATCH (e)
- WHERE NOT (e:Chunk OR e:Document) AND e.embedding IS NULL AND e.id IS NOT NULL
+ WHERE NOT (e:Chunk OR e:Document OR e:`__Community__`) AND e.embedding IS NULL AND e.id IS NOT NULL
RETURN elementId(e) AS elementId, e.id + " " + coalesce(e.description, "") AS text
"""
result = graph.query(query)
From d010e419292bb94704bb5e2e6b464cc2d2cf40f1 Mon Sep 17 00:00:00 2001
From: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Date: Fri, 30 Aug 2024 11:00:27 +0000
Subject: [PATCH 033/292] populate graph schema
---
backend/test_integrationqa.py | 48 ++++++++++++++++++++---------------
1 file changed, 28 insertions(+), 20 deletions(-)
diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py
index 4d0a5e1ea..821cc6b5c 100644
--- a/backend/test_integrationqa.py
+++ b/backend/test_integrationqa.py
@@ -141,13 +141,12 @@ def test_chatbot_qna(model_name, mode='vector'):
print("Failed ", e)
return QA_n_RAG
-#Get disconnected_nodes list
+#Get Test disconnected_nodes list
def disconected_nodes():
#graph = create_graph_database_connection(uri, userName, password, database)
graphDb_data_Access = graphDBdataAccess(graph)
nodes_list, total_nodes = graphDb_data_Access.list_unconnected_nodes()
print(nodes_list[0]["e"]["elementId"])
-
status = "False"
if total_nodes['total']>0:
@@ -157,15 +156,19 @@ def disconected_nodes():
return nodes_list[0]["e"]["elementId"], status
-#Delete disconnected_nodes list
+#Test Delete delete_disconnected_nodes list
def delete_disconected_nodes(lst_element_id):
print(f'disconnect elementid list {lst_element_id}')
#graph = create_graph_database_connection(uri, userName, password, database)
graphDb_data_Access = graphDBdataAccess(graph)
result = graphDb_data_Access.delete_unconnected_nodes(json.dumps(lst_element_id))
print(f'delete disconnect api result {result}')
+ if not result:
+ return "True"
+ else:
+ return "False"
-#Get Duplicate_nodes
+#Test Get Duplicate_nodes
def get_duplicate_nodes():
#graph = create_graph_database_connection(uri, userName, password, database)
graphDb_data_Access = graphDBdataAccess(graph)
@@ -174,11 +177,12 @@ def get_duplicate_nodes():
return "True"
else:
return "False"
-
-#Merge Duplicate_nodes
-def test_merge_duplicate_nodes():
- graphDb_data_Access = graphDBdataAccess(graph)
- result = graphDb_data_Access.merge_duplicate_nodes(duplicate_nodes_list)
+
+#Test populate_graph_schema
+def test_populate_graph_schema_from_text(model):
+ result_schema = populate_graph_schema_from_text('When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble.', model, True)
+ print(result_schema)
+ return result_schema
# def compare_graph_results(results):
# """
@@ -202,28 +206,32 @@ def run_tests():
for model_name in models:
try:
- final_list.append(test_graph_from_file_local(model_name))
- final_list.append(test_graph_from_wikipedia(model_name))
- final_list.append(test_graph_website(model_name))
- final_list.append(test_graph_from_youtube_video(model_name))
- final_list.append(test_chatbot_qna(model_name))
- final_list.append(test_chatbot_qna(model_name, mode='vector'))
- final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext'))
+ final_list.append(test_graph_from_file_local(model_name))
+ final_list.append(test_graph_from_wikipedia(model_name))
+ final_list.append(test_populate_graph_schema_from_text(model_name))
+ final_list.append(test_graph_website(model_name))
+ final_list.append(test_graph_from_youtube_video(model_name))
+ final_list.append(test_chatbot_qna(model_name))
+ final_list.append(test_chatbot_qna(model_name, mode='vector'))
+ final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext'))
except Exception as e:
error_list.append((model_name, str(e)))
- #Compare and log diffrences in graph results
- # compare_graph_results(final_list) # Pass the final_list to comapre_graph_results
-
+ # #Compare and log diffrences in graph results
+ # # compare_graph_results(final_list) # Pass the final_list to comapre_graph_results
+ # test_populate_graph_schema_from_text('openai-gpt-4o')
dis_elementid, dis_status = disconected_nodes()
lst_element_id = [dis_elementid]
- delete_disconected_nodes(lst_element_id)
+ delt = delete_disconected_nodes(lst_element_id)
dup = get_duplicate_nodes()
+ # schma = test_populate_graph_schema_from_text(model)
# Save final results to CSV
df = pd.DataFrame(final_list)
print(df)
df['execution_date'] = dt.today().strftime('%Y-%m-%d')
df['disconnected_nodes']=dis_status
df['get_duplicate_nodes']=dup
+ df['delete_disconected_nodes']=delt
+ # df['test_populate_graph_schema_from_text'] = schma
df.to_csv(f"Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False)
# Save error details to CSV
From b3a00aca7e9e3aa21edee9c78fec455edf012ef7 Mon Sep 17 00:00:00 2001
From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Date: Fri, 30 Aug 2024 13:20:01 +0000
Subject: [PATCH 034/292] not clearing the password when there is error
scenario
---
.../ConnectionModal/ConnectionModal.tsx | 22 ++++++++++---------
1 file changed, 12 insertions(+), 10 deletions(-)
diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx
index a7b3fcb1f..e11509adf 100644
--- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx
+++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx
@@ -197,6 +197,16 @@ export default function ConnectionModal({
if (response?.data?.status !== 'Success') {
throw new Error(response.data.error);
} else {
+ localStorage.setItem(
+ 'neo4j.connection',
+ JSON.stringify({
+ uri: connectionURI,
+ user: username,
+ password: password,
+ database: database,
+ userDbVectorIndex,
+ })
+ );
setUserDbVectorIndex(response.data.data.db_vector_dimension);
if (
(response.data.data.application_dimension === response.data.data.db_vector_dimension ||
@@ -228,6 +238,7 @@ export default function ConnectionModal({
/>
),
});
+ return;
} else {
setMessage({
type: 'danger',
@@ -244,17 +255,8 @@ export default function ConnectionModal({
/>
),
});
+ return;
}
- localStorage.setItem(
- 'neo4j.connection',
- JSON.stringify({
- uri: connectionURI,
- user: username,
- password: password,
- database: database,
- userDbVectorIndex,
- })
- );
}
} catch (error) {
setIsLoading(false);
From 77b06db1458345ff57a7e2aba2fc2ad04b805420 Mon Sep 17 00:00:00 2001
From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Date: Fri, 30 Aug 2024 13:49:00 +0000
Subject: [PATCH 035/292] fixed the vector index loading issue
---
frontend/src/components/Content.tsx | 2 +-
.../ConnectionModal/ConnectionModal.tsx | 20 +++++--------------
.../VectorIndexMisMatchAlert.tsx | 11 +++++++---
3 files changed, 14 insertions(+), 19 deletions(-)
diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx
index 039c04106..2dd6e1154 100644
--- a/frontend/src/components/Content.tsx
+++ b/frontend/src/components/Content.tsx
@@ -859,4 +859,4 @@ const Content: React.FC = ({
);
};
-export default Content;
\ No newline at end of file
+export default Content;
diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx
index e11509adf..23d9a1481 100644
--- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx
+++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx
@@ -104,7 +104,6 @@ export default function ConnectionModal({
type: 'danger',
content: (
recreateVectorIndex(chunksExistsWithDifferentEmbedding)}
isVectorIndexAlreadyExists={chunksExistsWithDifferentEmbedding || isVectorIndexMatch}
userVectorIndexDimension={JSON.parse(localStorage.getItem('neo4j.connection') ?? 'null').userDbVectorIndex}
@@ -113,7 +112,7 @@ export default function ConnectionModal({
),
});
}
- }, [isVectorIndexMatch, vectorIndexLoading, chunksExistsWithDifferentEmbedding, chunksExistsWithoutEmbedding]);
+ }, [isVectorIndexMatch, chunksExistsWithDifferentEmbedding, chunksExistsWithoutEmbedding]);
const parseAndSetURI = (uri: string, urlparams = false) => {
const uriParts: string[] = uri.split('://');
@@ -224,15 +223,7 @@ export default function ConnectionModal({
type: 'danger',
content: (
- recreateVectorIndex(
- !(
- response.data.data.db_vector_dimension > 0 &&
- response.data.data.db_vector_dimension != response.data.data.application_dimension
- )
- )
- }
+ recreateVectorIndex={() => recreateVectorIndex(false)}
isVectorIndexAlreadyExists={response.data.data.db_vector_dimension != 0}
chunksExists={true}
/>
@@ -244,7 +235,6 @@ export default function ConnectionModal({
type: 'danger',
content: (
recreateVectorIndex(true)}
isVectorIndexAlreadyExists={
response.data.data.db_vector_dimension != 0 &&
@@ -268,9 +258,9 @@ export default function ConnectionModal({
}
}
setTimeout(() => {
- if(connectionMessage?.type!="danger"){
- setMessage({ type: 'unknown', content: '' })
- }
+ if (connectionMessage?.type != 'danger') {
+ setMessage({ type: 'unknown', content: '' });
+ }
setPassword('');
}, 3000);
};
diff --git a/frontend/src/components/Popups/ConnectionModal/VectorIndexMisMatchAlert.tsx b/frontend/src/components/Popups/ConnectionModal/VectorIndexMisMatchAlert.tsx
index f9e85b63e..3c2965f44 100644
--- a/frontend/src/components/Popups/ConnectionModal/VectorIndexMisMatchAlert.tsx
+++ b/frontend/src/components/Popups/ConnectionModal/VectorIndexMisMatchAlert.tsx
@@ -2,21 +2,22 @@ import { Box, Flex } from '@neo4j-ndl/react';
import Markdown from 'react-markdown';
import ButtonWithToolTip from '../../UI/ButtonWithToolTip';
import { useCredentials } from '../../../context/UserCredentials';
+import { useState } from 'react';
export default function VectorIndexMisMatchAlert({
- vectorIndexLoading,
recreateVectorIndex,
isVectorIndexAlreadyExists,
userVectorIndexDimension,
chunksExists,
}: {
- vectorIndexLoading: boolean;
recreateVectorIndex: () => Promise;
isVectorIndexAlreadyExists: boolean;
userVectorIndexDimension?: number;
chunksExists: boolean;
}) {
const { userCredentials } = useCredentials();
+ const [vectorIndexLoading, setVectorIndexLoading] = useState(false);
+
return (
@@ -42,7 +43,11 @@ To proceed, please choose one of the following options:
label='creates the supported vector index'
placement='top'
loading={vectorIndexLoading}
- onClick={() => recreateVectorIndex()}
+ onClick={async () => {
+ setVectorIndexLoading(true);
+ await recreateVectorIndex();
+ setVectorIndexLoading(false);
+ }}
className='!w-full'
color='danger'
disabled={userCredentials === null}
From d1662909c6dac6194420b84c440986488648632c Mon Sep 17 00:00:00 2001
From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Date: Fri, 30 Aug 2024 14:07:51 +0000
Subject: [PATCH 036/292] fix: empty credentials payload for recreate vector
index api
---
.../ConnectionModal/ConnectionModal.tsx | 81 ++++++++++---------
1 file changed, 43 insertions(+), 38 deletions(-)
diff --git a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx
index 23d9a1481..ab9fe2399 100644
--- a/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx
+++ b/frontend/src/components/Popups/ConnectionModal/ConnectionModal.tsx
@@ -60,41 +60,43 @@ export default function ConnectionModal({
}, [open]);
const recreateVectorIndex = useCallback(
- async (isNewVectorIndex: boolean) => {
- try {
- setVectorIndexLoading(true);
- const response = await createVectorIndex(userCredentials as UserCredentials, isNewVectorIndex);
- setVectorIndexLoading(false);
- if (response.data.status === 'Failed') {
- throw new Error(response.data.error);
- } else {
- setMessage({
- type: 'success',
- content: 'Successfully created the vector index',
- });
- setConnectionStatus(true);
- localStorage.setItem(
- 'neo4j.connection',
- JSON.stringify({
- uri: userCredentials?.uri,
- user: userCredentials?.userName,
- password: userCredentials?.password,
- database: userCredentials?.database,
- userDbVectorIndex: 384,
- })
- );
- }
- } catch (error) {
- setVectorIndexLoading(false);
- if (error instanceof Error) {
- console.log('Error in recreating the vector index', error.message);
- setMessage({ type: 'danger', content: error.message });
+ async (isNewVectorIndex: boolean, usercredential: UserCredentials) => {
+ if (usercredential != null && Object.values(usercredential).length) {
+ try {
+ setVectorIndexLoading(true);
+ const response = await createVectorIndex(usercredential as UserCredentials, isNewVectorIndex);
+ setVectorIndexLoading(false);
+ if (response.data.status === 'Failed') {
+ throw new Error(response.data.error);
+ } else {
+ setMessage({
+ type: 'success',
+ content: 'Successfully created the vector index',
+ });
+ setConnectionStatus(true);
+ localStorage.setItem(
+ 'neo4j.connection',
+ JSON.stringify({
+ uri: usercredential?.uri,
+ user: usercredential?.userName,
+ password: usercredential?.password,
+ database: usercredential?.database,
+ userDbVectorIndex: 384,
+ })
+ );
+ }
+ } catch (error) {
+ setVectorIndexLoading(false);
+ if (error instanceof Error) {
+ console.log('Error in recreating the vector index', error.message);
+ setMessage({ type: 'danger', content: error.message });
+ }
}
+ setTimeout(() => {
+ setMessage({ type: 'unknown', content: '' });
+ setOpenConnection((prev) => ({ ...prev, openPopUp: false }));
+ }, 3000);
}
- setTimeout(() => {
- setMessage({ type: 'unknown', content: '' });
- setOpenConnection((prev) => ({ ...prev, openPopUp: false }));
- }, 3000);
},
[userCredentials, userDbVectorIndex]
);
@@ -104,7 +106,9 @@ export default function ConnectionModal({
type: 'danger',
content: (
recreateVectorIndex(chunksExistsWithDifferentEmbedding)}
+ recreateVectorIndex={() =>
+ recreateVectorIndex(chunksExistsWithDifferentEmbedding, userCredentials as UserCredentials)
+ }
isVectorIndexAlreadyExists={chunksExistsWithDifferentEmbedding || isVectorIndexMatch}
userVectorIndexDimension={JSON.parse(localStorage.getItem('neo4j.connection') ?? 'null').userDbVectorIndex}
chunksExists={chunksExistsWithoutEmbedding}
@@ -112,7 +116,7 @@ export default function ConnectionModal({
),
});
}
- }, [isVectorIndexMatch, chunksExistsWithDifferentEmbedding, chunksExistsWithoutEmbedding]);
+ }, [isVectorIndexMatch, chunksExistsWithDifferentEmbedding, chunksExistsWithoutEmbedding, userCredentials]);
const parseAndSetURI = (uri: string, urlparams = false) => {
const uriParts: string[] = uri.split('://');
@@ -188,7 +192,8 @@ export default function ConnectionModal({
const submitConnection = async () => {
const connectionURI = `${protocol}://${URI}${URI.split(':')[1] ? '' : `:${port}`}`;
- setUserCredentials({ uri: connectionURI, userName: username, password: password, database: database, port: port });
+ const credential = { uri: connectionURI, userName: username, password: password, database: database, port: port };
+ setUserCredentials(credential);
setIsLoading(true);
try {
const response = await connectAPI(connectionURI, username, password, database);
@@ -223,7 +228,7 @@ export default function ConnectionModal({
type: 'danger',
content: (
recreateVectorIndex(false)}
+ recreateVectorIndex={() => recreateVectorIndex(false, credential)}
isVectorIndexAlreadyExists={response.data.data.db_vector_dimension != 0}
chunksExists={true}
/>
@@ -235,7 +240,7 @@ export default function ConnectionModal({
type: 'danger',
content: (
recreateVectorIndex(true)}
+ recreateVectorIndex={() => recreateVectorIndex(true, credential)}
isVectorIndexAlreadyExists={
response.data.data.db_vector_dimension != 0 &&
response.data.data.db_vector_dimension != response.data.data.application_dimension
From 088eda21f5f0ee553ddc955fad24f3c113adbfa9 Mon Sep 17 00:00:00 2001
From: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Date: Mon, 2 Sep 2024 16:51:26 +0530
Subject: [PATCH 037/292] chatbot status (#676)
* chatbot status
* connection status check for ASK button
* refresh disable check
* review comment resolved
* format fixes
---
frontend/src/components/ChatBot/Chatbot.tsx | 15 +++++++++++----
frontend/src/components/Content.tsx | 3 +--
frontend/src/components/Graph/GraphViewModal.tsx | 4 ++++
frontend/src/components/Layout/DrawerChatbot.tsx | 3 ++-
frontend/src/components/Layout/PageLayout.tsx | 9 +++++++--
frontend/src/components/Layout/SideNav.tsx | 3 +++
.../Deduplication/index.tsx | 4 ++--
frontend/src/context/UserCredentials.tsx | 7 ++++++-
frontend/src/types.ts | 4 ++++
9 files changed, 40 insertions(+), 12 deletions(-)
diff --git a/frontend/src/components/ChatBot/Chatbot.tsx b/frontend/src/components/ChatBot/Chatbot.tsx
index da5aaaf29..448af0e7d 100644
--- a/frontend/src/components/ChatBot/Chatbot.tsx
+++ b/frontend/src/components/ChatBot/Chatbot.tsx
@@ -22,7 +22,14 @@ import FallBackDialog from '../UI/FallBackDialog';
const InfoModal = lazy(() => import('./ChatInfoModal'));
const Chatbot: FC = (props) => {
- const { messages: listMessages, setMessages: setListMessages, isLoading, isFullScreen, clear } = props;
+ const {
+ messages: listMessages,
+ setMessages: setListMessages,
+ isLoading,
+ isFullScreen,
+ clear,
+ connectionStatus,
+ } = props;
const [inputMessage, setInputMessage] = useState('');
const [loading, setLoading] = useState(isLoading);
const { userCredentials } = useCredentials();
@@ -289,7 +296,7 @@ const Chatbot: FC = (props) => {
shape='square'
size='x-large'
source={ChatBotAvatar}
- status='online'
+ status={connectionStatus ? 'online' : 'offline'}
type='image'
/>
) : (
@@ -299,7 +306,7 @@ const Chatbot: FC = (props) => {
name='KM'
shape='square'
size='x-large'
- status='online'
+ status={connectionStatus ? 'online' : 'offline'}
type='image'
/>
)}
@@ -415,7 +422,7 @@ const Chatbot: FC = (props) => {
placement='top'
text={`Query Documents in ${chatMode} mode`}
type='submit'
- disabled={loading}
+ disabled={loading || !connectionStatus}
size='medium'
>
{buttonCaptions.ask} {selectedRows != undefined && selectedRows.length > 0 && `(${selectedRows.length})`}
diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx
index 2dd6e1154..44e7fd325 100644
--- a/frontend/src/components/Content.tsx
+++ b/frontend/src/components/Content.tsx
@@ -57,8 +57,7 @@ const Content: React.FC = ({
});
const [openGraphView, setOpenGraphView] = useState(false);
const [inspectedName, setInspectedName] = useState('');
- const [connectionStatus, setConnectionStatus] = useState(false);
- const { setUserCredentials, userCredentials } = useCredentials();
+ const { setUserCredentials, userCredentials, connectionStatus, setConnectionStatus } = useCredentials();
const [showConfirmationModal, setshowConfirmationModal] = useState(false);
const [extractLoading, setextractLoading] = useState(false);
diff --git a/frontend/src/components/Graph/GraphViewModal.tsx b/frontend/src/components/Graph/GraphViewModal.tsx
index 39438b788..4125c0d12 100644
--- a/frontend/src/components/Graph/GraphViewModal.tsx
+++ b/frontend/src/components/Graph/GraphViewModal.tsx
@@ -69,6 +69,7 @@ const GraphViewModal: React.FunctionComponent = ({
const [newScheme, setNewScheme] = useState({});
const [searchQuery, setSearchQuery] = useState('');
const debouncedQuery = useDebounce(searchQuery, 300);
+ const [disableRefresh, setDisableRefresh] = useState(false);
// the checkbox selection
const handleCheckboxChange = (graph: GraphType) => {
@@ -165,6 +166,7 @@ const GraphViewModal: React.FunctionComponent = ({
setAllNodes(finalNodes);
setAllRelationships(finalRels);
setScheme(schemeVal);
+ setDisableRefresh(false);
} else {
setLoading(false);
setStatus('danger');
@@ -292,6 +294,7 @@ const GraphViewModal: React.FunctionComponent = ({
// Refresh the graph with nodes and relations if file is processing
const handleRefresh = () => {
+ setDisableRefresh(true);
graphApi('refreshMode');
setGraphType(graphType);
setNodes(nodes);
@@ -455,6 +458,7 @@ const GraphViewModal: React.FunctionComponent = ({
text='Refresh graph'
onClick={handleRefresh}
placement='left'
+ disabled={disableRefresh}
>
diff --git a/frontend/src/components/Layout/DrawerChatbot.tsx b/frontend/src/components/Layout/DrawerChatbot.tsx
index d08b2d8d1..3150e96a5 100644
--- a/frontend/src/components/Layout/DrawerChatbot.tsx
+++ b/frontend/src/components/Layout/DrawerChatbot.tsx
@@ -3,7 +3,7 @@ import Chatbot from '../ChatBot/Chatbot';
import { DrawerChatbotProps, Messages } from '../../types';
import { useMessageContext } from '../../context/UserMessages';
-const DrawerChatbot: React.FC = ({ isExpanded, clearHistoryData, messages }) => {
+const DrawerChatbot: React.FC = ({ isExpanded, clearHistoryData, messages, connectionStatus }) => {
const { setMessages } = useMessageContext();
const getIsLoading = (messages: Messages[]) => {
@@ -19,6 +19,7 @@ const DrawerChatbot: React.FC = ({ isExpanded, clearHistoryD
setMessages={setMessages}
clear={clearHistoryData}
isLoading={getIsLoading(messages)}
+ connectionStatus={connectionStatus}
/>
diff --git a/frontend/src/components/Layout/PageLayout.tsx b/frontend/src/components/Layout/PageLayout.tsx
index 8aca58109..8361ad3cd 100644
--- a/frontend/src/components/Layout/PageLayout.tsx
+++ b/frontend/src/components/Layout/PageLayout.tsx
@@ -33,7 +33,7 @@ export default function PageLayoutNew({
const [shows3Modal, toggleS3Modal] = useReducer((s) => !s, false);
const [showGCSModal, toggleGCSModal] = useReducer((s) => !s, false);
const [showGenericModal, toggleGenericModal] = useReducer((s) => !s, false);
- const { userCredentials } = useCredentials();
+ const { userCredentials, connectionStatus } = useCredentials();
const toggleLeftDrawer = () => {
if (largedesktops) {
setIsLeftExpanded(!isLeftExpanded);
@@ -156,7 +156,12 @@ export default function PageLayoutNew({
closeSettingModal={closeSettingModal}
/>
{showDrawerChatbot && (
-
+
)}
= ({
position,
@@ -43,6 +44,7 @@ const SideNav: React.FC = ({
const [chatModeAnchor, setchatModeAnchor] = useState(null);
const [showChatMode, setshowChatMode] = useState(false);
const largedesktops = useMediaQuery(`(min-width:1440px )`);
+ const { connectionStatus } = useCredentials();
const date = new Date();
useEffect(() => {
@@ -249,6 +251,7 @@ const SideNav: React.FC = ({
messages={messages ?? []}
setMessages={setMessages}
isLoading={getIsLoading(messages ?? [])}
+ connectionStatus={connectionStatus}
/>
,
diff --git a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx
index f5a021e30..36fcff3d1 100644
--- a/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx
+++ b/frontend/src/components/Popups/GraphEnhancementDialog/Deduplication/index.tsx
@@ -80,12 +80,12 @@ export default function DeduplicationTab() {
const onRemove = (nodeid: string, similarNodeId: string) => {
setDuplicateNodes((prev) => {
return prev.map((d) =>
- (d.e.elementId === nodeid
+ d.e.elementId === nodeid
? {
...d,
similar: d.similar.filter((n) => n.elementId != similarNodeId),
}
- : d)
+ : d
);
});
};
diff --git a/frontend/src/context/UserCredentials.tsx b/frontend/src/context/UserCredentials.tsx
index 20ed75ae5..af13ea371 100644
--- a/frontend/src/context/UserCredentials.tsx
+++ b/frontend/src/context/UserCredentials.tsx
@@ -1,4 +1,4 @@
-import { createContext, useState, useContext, FunctionComponent, ReactNode } from 'react';
+import { createContext, useState, useContext, FunctionComponent, ReactNode, useReducer } from 'react';
import { ContextProps, UserCredentials } from '../types';
type Props = {
@@ -8,6 +8,8 @@ type Props = {
export const UserConnection = createContext({
userCredentials: null,
setUserCredentials: () => null,
+ connectionStatus: false,
+ setConnectionStatus: () => null,
});
export const useCredentials = () => {
const userCredentials = useContext(UserConnection);
@@ -15,9 +17,12 @@ export const useCredentials = () => {
};
const UserCredentialsWrapper: FunctionComponent = (props) => {
const [userCredentials, setUserCredentials] = useState(null);
+ const [connectionStatus, setConnectionStatus] = useReducer((state) => !state, false);
const value = {
userCredentials,
setUserCredentials,
+ connectionStatus,
+ setConnectionStatus,
};
return {props.children};
};
diff --git a/frontend/src/types.ts b/frontend/src/types.ts
index 5bf076d10..2802a2d6b 100644
--- a/frontend/src/types.ts
+++ b/frontend/src/types.ts
@@ -247,6 +247,7 @@ export type ChatbotProps = {
isLoading: boolean;
clear?: boolean;
isFullScreen?: boolean;
+ connectionStatus: boolean;
};
export interface WikipediaModalTypes {
hideModal: () => void;
@@ -638,11 +639,14 @@ export interface DrawerChatbotProps {
isExpanded: boolean;
clearHistoryData: boolean;
messages: Messages[];
+ connectionStatus: boolean;
}
export interface ContextProps {
userCredentials: UserCredentials | null;
setUserCredentials: (UserCredentials: UserCredentials) => void;
+ connectionStatus: boolean;
+ setConnectionStatus: Dispatch>;
}
export interface MessageContextType {
messages: Messages[] | [];
From dbfe2a73752f951f2f7a5a570035d9b3e071cf3c Mon Sep 17 00:00:00 2001
From: vasanthasaikalluri
<165021735+vasanthasaikalluri@users.noreply.github.com>
Date: Mon, 2 Sep 2024 17:19:37 +0000
Subject: [PATCH 038/292] added properties and modified to entity labels
---
backend/src/communities.py | 243 ++++++++++++++++++++++++--------
backend/src/shared/common_fn.py | 4 +-
2 files changed, 190 insertions(+), 57 deletions(-)
diff --git a/backend/src/communities.py b/backend/src/communities.py
index dbcb73ebc..1c670fff1 100644
--- a/backend/src/communities.py
+++ b/backend/src/communities.py
@@ -3,19 +3,19 @@
from src.llm import get_llm
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
-from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
-from tqdm import tqdm
+import os
+from src.shared.common_fn import load_embedding_model
COMMUNITY_PROJECTION_NAME = "communities"
NODE_PROJECTION = "!Chunk&!Document&!__Community__"
+NODE_PROJECTION_ENTITY = "__Entity__"
MAX_WORKERS = 10
CREATE_COMMUNITY_GRAPH_PROJECTION = """
-MATCH (source:{node_projection})
-OPTIONAL MATCH (source)-[]->(target:{node_projection})
+MATCH (source:{node_projection})-[]->(target:{node_projection})
WITH source, target, count(*) as weight
WITH gds.graph.project(
'{project_name}',
@@ -32,7 +32,7 @@
CREATE_COMMUNITY_CONSTRAINT = "CREATE CONSTRAINT IF NOT EXISTS FOR (c:__Community__) REQUIRE c.id IS UNIQUE;"
CREATE_COMMUNITY_LEVELS = """
-MATCH (e:!Chunk&!Document&!__Community__)
+MATCH (e:`__Entity__`)
WHERE e.communities is NOT NULL
UNWIND range(0, size(e.communities) - 1 , 1) AS index
CALL {
@@ -52,7 +52,7 @@
ON CREATE SET current.level = index
MERGE (previous:`__Community__` {id: toString(index - 1) + '-' + toString(e.communities[index - 1])})
ON CREATE SET previous.level = index - 1
- MERGE (previous)-[:IN_COMMUNITY]->(current)
+ MERGE (previous)-[:PARENT_COMMUNITY]->(current)
RETURN count(*) AS count_1
}
RETURN count(*)
@@ -63,13 +63,26 @@
SET c.community_rank = rank;
"""
+CREATE_PARENT_COMMUNITY_RANKS = """
+MATCH (c:__Community__)<-[:PARENT_COMMUNITY*]-(:__Community__)<-[:IN_COMMUNITY*]-(:!Chunk&!Document&!__Community__)<-[HAS_ENTITY]-(:Chunk)<-[]-(d:Document)
+WITH c, count(distinct d) AS rank
+SET c.community_rank = rank;
+"""
+
CREATE_COMMUNITY_WEIGHTS = """
MATCH (n:`__Community__`)<-[:IN_COMMUNITY]-()<-[:HAS_ENTITY]-(c)
WITH n, count(distinct c) AS chunkCount
-SET n.weight = chunkCount"""
+SET n.weight = chunkCount
+"""
+CREATE_PARENT_COMMUNITY_WEIGHTS = """
+MATCH (n:`__Community__`)<-[:PARENT_COMMUNITY*]-(:`__Community__`)<-[:IN_COMMUNITY]-()<-[:HAS_ENTITY]-(c)
+WITH n, count(distinct c) AS chunkCount
+SET n.weight = chunkCount
+"""
GET_COMMUNITY_INFO = """
MATCH (c:`__Community__`)<-[:IN_COMMUNITY]-(e)
+WHERE c.level = 0
WITH c, collect(e) AS nodes
WHERE size(nodes) > 1
CALL apoc.path.subgraphAll(nodes[0], {
@@ -81,17 +94,64 @@
[r in relationships | {start: startNode(r).id, type: type(r), end: endNode(r).id}] AS rels
"""
+GET_PARENT_COMMUNITY_INFO = """
+MATCH (p:`__Community__`)<-[:PARENT_COMMUNITY*]-(c:`__Community__`)
+WHERE p.summary is null and c.summary is not null
+RETURN p.id as communityId, collect(c.summary) as texts
+"""
+
+
STORE_COMMUNITY_SUMMARIES = """
UNWIND $data AS row
MERGE (c:__Community__ {id:row.community})
SET c.summary = row.summary
"""
+COMMUNITY_SYSTEM_TEMPLATE = "Given input triples, generate the information summary. No pre-amble."
+
COMMUNITY_TEMPLATE = """Based on the provided nodes and relationships that belong to the same graph community,
generate a natural language summary of the provided information:
{community_info}
-Summary:"""
+Summary:"""
+
+PARENT_COMMUNITY_SYSTEM_TEMPLATE = "Given an input list of community summaries, generate a summary of the information"
+
+PARENT_COMMUNITY_TEMPLATE = """Based on the provided list of community summaries that belong to the same graph community,
+generate a natural language summary of the information.Include all the necessary information as possible
+{community_info}
+
+Summary:"""
+
+
+GET_COMMUNITY_DETAILS = """
+MATCH (c:`__Community__`)
+WHERE c.embedding IS NULL AND c.summary IS NOT NULL
+RETURN c.id as communityId, c.summary as text
+"""
+
+WRITE_COMMUNITY_EMBEDDINGS = """
+UNWIND $rows AS row
+MATCH (c) WHERE c.id = row.communityId
+CALL db.create.setNodeVectorProperty(c, "embedding", row.embedding)
+"""
+
+DROP_COMMUNITIES = "MATCH (c:`__Community__`) DETACH DELETE c"
+DROP_COMMUNITY_PROPERTY = "MATCH (e:`__Entity__`) REMOVE e.communities"
+
+
+ENTITY_VECTOR_INDEX_NAME = "entity_vector"
+ENTITY_VECTOR_EMBEDDING_DIMENSION = 384
+
+CREATE_ENTITY_VECTOR_INDEX = """
+CREATE VECTOR INDEX {index_name} IF NOT EXISTS FOR (e:__Entity__) ON e.embedding
+OPTIONS {{
+ indexConfig: {{
+ `vector.dimensions`: {embedding_dimension},
+ `vector.similarity_function`: 'cosine'
+ }}
+}}
+"""
def get_gds_driver(uri, username, password, database):
@@ -117,22 +177,6 @@ def create_community_graph_projection(gds, project_name=COMMUNITY_PROJECTION_NAM
gds.graph.drop(project_name)
logging.info(f"Creating new graph project '{project_name}'.")
- # graph_project, result = gds.graph.project(
- # project_name,
- # node_projection,
- # {
- # "_ALL_": {
- # "type": "*",
- # "orientation": "UNDIRECTED",
- # "properties": {
- # "weight": {
- # "property": "*",
- # "aggregation": "COUNT"
- # }
- # }
- # }
- # }
- # )
projection_query = CREATE_COMMUNITY_GRAPH_PROJECTION.format(node_projection=node_projection,project_name=project_name)
graph_projection_result = gds.run_cypher(projection_query)
projection_result = graph_projection_result.to_dict(orient="records")[0]
@@ -159,14 +203,17 @@ def write_communities(gds, graph_project, project_name=COMMUNITY_PROJECTION_NAME
return False
-def get_community_chain(model, community_template=COMMUNITY_TEMPLATE):
+def get_community_chain(model, is_parent=False,community_template=COMMUNITY_TEMPLATE,system_template=COMMUNITY_SYSTEM_TEMPLATE):
try:
+ if is_parent:
+ community_template=PARENT_COMMUNITY_TEMPLATE
+ system_template= PARENT_COMMUNITY_SYSTEM_TEMPLATE
llm, model_name = get_llm(model)
community_prompt = ChatPromptTemplate.from_messages(
[
(
"system",
- "Given input triples, generate the information summary. No pre-amble.",
+ system_template,
),
("human", community_template),
]
@@ -200,68 +247,154 @@ def prepare_string(community_data):
logging.error(f"Failed to prepare string from community data: {e}")
raise
-def process_community(community, community_chain):
+def process_community_info(community, chain, is_parent=False):
try:
- formatted_community_info = prepare_string(community)
- summary = community_chain.invoke({'community_info': formatted_community_info})
+ if is_parent:
+ combined_text = " ".join(f"Summary {i+1}: {summary}" for i, summary in enumerate(community.get("texts", [])))
+ else:
+ combined_text = prepare_string(community)
+ summary = chain.invoke({'community_info': combined_text})
return {"community": community['communityId'], "summary": summary}
except Exception as e:
logging.error(f"Failed to process community {community.get('communityId', 'unknown')}: {e}")
- raise
+ return None
def create_community_summaries(gds, model):
try:
community_info_list = gds.run_cypher(GET_COMMUNITY_INFO)
community_chain = get_community_chain(model)
-
+
summaries = []
- futures = []
with ThreadPoolExecutor() as executor:
- for _,community in community_info_list.iterrows():
- future = executor.submit(process_community, community, community_chain)
- futures.append(future)
-
+ futures = [executor.submit(process_community_info, community, community_chain) for _, community in community_info_list.items()]
+
for future in as_completed(futures):
- try:
- summaries.append(future.result())
- except Exception as e:
- logging.error(f"Failed to retrieve result for a community: {e}")
+ result = future.result()
+ if result:
+ summaries.append(result)
+ else:
+ logging.error("community summaries could not be processed.")
gds.run_cypher(STORE_COMMUNITY_SUMMARIES, params={"data": summaries})
+
+ parent_community_info = gds.run_cypher(GET_PARENT_COMMUNITY_INFO)
+ parent_community_chain = get_community_chain(model, is_parent=True)
+
+ parent_summaries = []
+ with ThreadPoolExecutor() as executor:
+ futures = [executor.submit(process_community_info, community, parent_community_chain, is_parent=True) for _, community in parent_community_info.items()]
+
+ for future in as_completed(futures):
+ result = future.result()
+ if result:
+ parent_summaries.append(result)
+ else:
+ logging.error("parent community summaries could not be processed.")
+
+ gds.run_cypher(STORE_COMMUNITY_SUMMARIES, params={"data": parent_summaries})
+
except Exception as e:
logging.error(f"Failed to create community summaries: {e}")
raise
+def create_community_embeddings(gds):
+ try:
+ embedding_model = os.getenv('EMBEDDING_MODEL')
+ embeddings, dimension = load_embedding_model(embedding_model)
+ logging.info(f"Embedding model '{embedding_model}' loaded successfully.")
+
+ logging.info("Fetching community details.")
+ rows = gds.run_cypher(GET_COMMUNITY_DETAILS)
+ rows = rows[['communityId', 'text']].to_dict(orient='records')
+ logging.info(f"Fetched {len(rows)} communities.")
+
+ batch_size = 100
+ for i in range(0, len(rows), batch_size):
+ batch_rows = rows[i:i+batch_size]
+ for row in batch_rows:
+ try:
+ row['embedding'] = embeddings.embed_query(row['text'])
+ except Exception as e:
+ logging.error(f"Failed to embed text for community ID {row['communityId']}: {e}")
+ row['embedding'] = None
+
+ try:
+ logging.info("Writing embeddings to the database.")
+ gds.run_cypher(WRITE_COMMUNITY_EMBEDDINGS, params={'rows': batch_rows})
+ logging.info("Embeddings written successfully.")
+ except Exception as e:
+ logging.error(f"Failed to write embeddings to the database: {e}")
+ continue
+ return dimension
+ except Exception as e:
+ logging.error(f"An error occurred during the community embedding process: {e}")
+def create_entity_vector_index(gds, embedding_dimension=ENTITY_VECTOR_EMBEDDING_DIMENSION):
+ query = CREATE_ENTITY_VECTOR_INDEX.format(
+ index_name=ENTITY_VECTOR_INDEX_NAME,
+ embedding_dimension=embedding_dimension
+ )
+ try:
+ logging.info(f"Running Cypher query to create entity vector index: {query}")
+ gds.run_cypher(query)
+ logging.info("Entity vector index created successfully.")
+ except Exception as e:
+ logging.error(f"Error occurred while creating entity vector index: {e}", exc_info=True)
+
def create_community_properties(gds, model):
+ commands = [
+ (CREATE_COMMUNITY_CONSTRAINT, "created community constraint to the graph."),
+ (CREATE_COMMUNITY_LEVELS, "Successfully created community levels."),
+ (CREATE_COMMUNITY_RANKS, "Successfully created community ranks."),
+ (CREATE_PARENT_COMMUNITY_RANKS, "Successfully created parent community ranks."),
+ (CREATE_COMMUNITY_WEIGHTS, "Successfully created community weights."),
+ (CREATE_PARENT_COMMUNITY_WEIGHTS, "Successfully created parent community weights."),
+ ]
try:
- # Create community levels
- gds.run_cypher(CREATE_COMMUNITY_LEVELS)
- logging.info("Successfully created community levels.")
+ for command, message in commands:
+ gds.run_cypher(command)
+ logging.info(message)
- # Create community ranks
- gds.run_cypher(CREATE_COMMUNITY_RANKS)
- logging.info("Successfully created community ranks.")
-
- # Create community weights
- gds.run_cypher(CREATE_COMMUNITY_WEIGHTS)
- logging.info("Successfully created community weights.")
-
- # Create community summaries
create_community_summaries(gds, model)
logging.info("Successfully created community summaries.")
+
+ embedding_dimension = create_community_embeddings(gds)
+ logging.info("Successfully created community embeddings.")
+
+ create_entity_vector_index(gds,embedding_dimension=embedding_dimension)
+ logging.info("Successfully created Entity Vector Index.")
+
except Exception as e:
- logging.error(f"Failed to create community properties: {e}")
+ logging.error(f"Error during community properties creation: {e}")
raise
+
+def clear_communities(gds):
+ try:
+ logging.info("Starting to clear communities.")
+
+ logging.info("Dropping communities...")
+ gds.run_cypher(DROP_COMMUNITIES)
+ logging.info(f"Communities dropped successfully")
+
+ logging.info("Dropping community property from entities...")
+ gds.run_cypher(DROP_COMMUNITY_PROPERTY)
+ logging.info(f"Community property dropped successfully")
+
+ except Exception as e:
+ logging.error(f"An error occurred while clearing communities: {e}")
+ raise
+
+
def create_communities(uri, username, password, database,model):
try:
gds = get_gds_driver(uri, username, password, database)
+ clear_communities(gds)
+
graph_project = create_community_graph_projection(gds)
write_communities_sucess = write_communities(gds, graph_project)
if write_communities_sucess:
- logging.info("Applying community constraint to the graph.")
- gds.run_cypher(CREATE_COMMUNITY_CONSTRAINT)
+ logging.info("Starting Community properties creation process.")
create_community_properties(gds,model)
logging.info("Communities creation process completed successfully.")
else:
diff --git a/backend/src/shared/common_fn.py b/backend/src/shared/common_fn.py
index 2037682e7..6d24912c7 100644
--- a/backend/src/shared/common_fn.py
+++ b/backend/src/shared/common_fn.py
@@ -94,8 +94,8 @@ def load_embedding_model(embedding_model_name: str):
return embeddings, dimension
def save_graphDocuments_in_neo4j(graph:Neo4jGraph, graph_document_list:List[GraphDocument]):
- # graph.add_graph_documents(graph_document_list, baseEntityLabel=True,include_source=True)
- graph.add_graph_documents(graph_document_list)
+ graph.add_graph_documents(graph_document_list, baseEntityLabel=True,include_source=True)
+ # graph.add_graph_documents(graph_document_list)
def handle_backticks_nodes_relationship_id_type(graph_document_list:List[GraphDocument]):
for graph_document in graph_document_list:
From 3d25d78cf68a35bb142074585ff7c36587fb35a9 Mon Sep 17 00:00:00 2001
From: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Date: Tue, 3 Sep 2024 17:51:29 +0530
Subject: [PATCH 039/292] Post processing call after all files completion
(#716)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* Dev To STAGING (#532)
* format fixes and graph schema indication fix
* Update README.md
* added chat modes variable in env updated the readme
* spell fix
* added the chat mode in env table
* added the logos
* fixed the overflow issues
* removed the extra fix
* Fixed specific scenario "when the text from schema closes it should reopen the previous modal"
* readme changes
* removed dev console logs
* added new retrieval query (#533)
* format fixes and tab rendering fix
* fixed the setting modal reopen issue
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
* Dev (#535)
* format fixes and graph schema indication fix
* Update README.md
* added chat modes variable in env updated the readme
* spell fix
* added the chat mode in env table
* added the logos
* fixed the overflow issues
* removed the extra fix
* Fixed specific scenario "when the text from schema closes it should reopen the previous modal"
* readme changes
* removed dev console logs
* added new retrieval query (#533)
* format fixes and tab rendering fix
* fixed the setting modal reopen issue
---------
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
* Dev (#537)
* format fixes and graph schema indication fix
* Update README.md
* added chat modes variable in env updated the readme
* spell fix
* added the chat mode in env table
* added the logos
* fixed the overflow issues
* removed the extra fix
* Fixed specific scenario "when the text from schema closes it should reopen the previous modal"
* readme changes
* removed dev console logs
* added new retrieval query (#533)
* format fixes and tab rendering fix
* fixed the setting modal reopen issue
---------
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
* Fix typo: correct 'josn_obj' to 'json_obj' (#697)
* Staging To Main (#495)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
* Dev (#433)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
---------
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: Ajay Meena
Co-authored-by: Morgan Senechal
Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com>
* fixed gcs status message issue
* added if check for failed count
* Null issue Fixed from backend for upload API and graph_document when model name mismatch
* added word break issue
* Added neo4j-rust-ext
* processing time estimation based on bytes
* File extension upper case fixed, File delete from GCS or local based on env variable.
* timer per byte
* Update Dockerfile
* Adding sort rows on the table (#451)
* Gcs upload folder hashed (#453)
* implement foldername hashed in GCS bucket uplaod
* Raise exception if invalid model selected
* folder name for gcs upload
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* upload all unstructuredfiles to gcs (#455)
* Mofified chunk query (#454)
* Added libre office for fixing error -- soffice command was not found. Please install libreoffice
on your system and try again.
- Install instructions: https://www.libreoffice.org/get-help/install-howto/
- Mac: https://formulae.brew.sh/cask/libreoffice
- Debian: https://wiki.debian.org/LibreOffice"
* Fix the PARTIAL CONTENT issue
* File-table no data found (#456)
* 'file-table''
* review comment
* Llm format change (#459)
* changed the llm models format to lowercase
* added the error message
* llm model changes
* format fixes
* removed unused import
* added the capitalize method
* delete files from merged_file_path only if source is local file
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* commented total page code (#460)
* format fixes
* removed the disabled check on dropdown
* Large file env
* DEV to STAGING (#461)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
* fixed gcs status message issue
* added if check for failed count
* Null issue Fixed from backend for upload API and graph_document when model name mismatch
* added word break issue
* Added neo4j-rust-ext
* processing time estimation based on bytes
* File extension upper case fixed, File delete from GCS or local based on env variable.
* timer per byte
* Update Dockerfile
* Adding sort rows on the table (#451)
* Gcs upload folder hashed (#453)
* implement foldername hashed in GCS bucket uplaod
* Raise exception if invalid model selected
* folder name for gcs upload
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* upload all unstructuredfiles to gcs (#455)
* Mofified chunk query (#454)
* Added libre office for fixing error -- soffice command was not found. Please install libreoffice
on your system and try again.
- Install instructions: https://www.libreoffice.org/get-help/install-howto/
- Mac: https://formulae.brew.sh/cask/libreoffice
- Debian: https://wiki.debian.org/LibreOffice"
* Fix the PARTIAL CONTENT issue
* File-table no data found (#456)
* 'file-table''
* review comment
* Llm format change (#459)
* changed the llm models format to lowercase
* added the error message
* llm model changes
* format fixes
* removed unused import
* added the capitalize method
* delete files from merged_file_path only if source is local file
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* commented total page code (#460)
* format fixes
* removed the disabled check on dropdown
* Large file env
---------
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: Ajay Meena
Co-authored-by: Morgan Senechal
Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com>
* DEV to STAGING (#462)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
* fixed gcs status message issue
* added if check for failed count
* Null issue Fixed from backend for upload API and graph_document when model name mismatch
* added word break issue
* Added neo4j-rust-ext
* processing time estimation based on bytes
* File extension upper case fixed, File delete from GCS or local based on env variable.
* timer per byte
* Update Dockerfile
* Adding sort rows on the table (#451)
* Gcs upload folder hashed (#453)
* implement foldername hashed in GCS bucket uplaod
* Raise exception if invalid model selected
* folder name for gcs upload
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* upload all unstructuredfiles to gcs (#455)
* Mofified chunk query (#454)
* Added libre office for fixing error -- soffice command was not found. Please install libreoffice
on your system and try again.
- Install instructions: https://www.libreoffice.org/get-help/install-howto/
- Mac: https://formulae.brew.sh/cask/libreoffice
- Debian: https://wiki.debian.org/LibreOffice"
* Fix the PARTIAL CONTENT issue
* File-table no data found (#456)
* 'file-table''
* review comment
* Llm format change (#459)
* changed the llm models format to lowercase
* added the error message
* llm model changes
* format fixes
* removed unused import
* added the capitalize method
* delete files from merged_file_path only if source is local file
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* commented total page code (#460)
* format fixes
* removed the disabled check on dropdown
* Large file env
---------
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: Ajay Meena
Co-authored-by: Morgan Senechal
Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com>
* added upload api
* changed the dropzone error message
* Dev to staging (#466)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
* fixed gcs status message issue
* added if check for failed count
* Null issue Fixed from backend for upload API and graph_document when model name mismatch
* added word break issue
* Added neo4j-rust-ext
* processing time estimation based on bytes
* File extension upper case fixed, File delete from GCS or local based on env variable.
* timer per byte
* Update Dockerfile
* Adding sort rows on the table (#451)
* Gcs upload folder hashed (#453)
* implement foldername hashed in GCS bucket uplaod
* Raise exception if invalid model selected
* folder name for gcs upload
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* upload all unstructuredfiles to gcs (#455)
* Mofified chunk query (#454)
* Added libre office for fixing error -- soffice command was not found. Please install libreoffice
on your system and try again.
- Install instructions: https://www.libreoffice.org/get-help/install-howto/
- Mac: https://formulae.brew.sh/cask/libreoffice
- Debian: https://wiki.debian.org/LibreOffice"
* Fix the PARTIAL CONTENT issue
* File-table no data found (#456)
* 'file-table''
* review comment
* Llm format change (#459)
* changed the llm models format to lowercase
* added the error message
* llm model changes
* format fixes
* removed unused import
* added the capitalize method
* delete files from merged_file_path only if source is local file
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* commented total page code (#460)
* format fixes
* removed the disabled check on dropdown
* Large file env
* added upload api
* changed the dropzone error message
---------
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: Ajay Meena
Co-authored-by: Morgan Senechal
Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com>
* format fixes
* Close connect when graph object is not none
* Call garbage collector to release the menory
* Change error message
* Added driver config as user_agent
* Updated doc for the LLM_MODELS and GCS_FILE_CACHE (#473)
* Web URLs are user input (#475)
* web url support backend
* added the tabs for input source
* user agent added for Neo4jGraph connection
* Tab view for sources
* extract handling for web ur's
* initial input handling
* chunk creation before processing
* code structure
* format fixes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* changed the regex for web and cancel button naming
* changed the schema dropdown type
* readme updates
* PROD version fix
* changed the alert message for gcs
* Delete unconnected entities from DB (#482)
* 457 add schema before generate graph (#478)
* schema setting from generate graph
* changes
* changes
* badge changes
* bug fix
* Fulltext index and Update similarity graph (#479)
* added full_text index
* added one common function for post_processing
* post processing api
* added tasks param
* modifed logging
* post processing changes
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
* Graph and vector search (#485)
* Modified the retrival query
* added the chatmode toggle component
* Modified to vector search
* Moved the templates to constants
* added the icons
* added chat modes
* code structure changes
* Intergrated the API changges
* Modified retrieval queries,refactored code
* API integration changes
* added the score
* order change
* wording change
* modified constants
* added graph+vector
* added the tooltips
* Modified query
* removed the graph mode
* tooltip camel Case
* added the icon and extern link for web source in the info modal
* added the youtube link in the source used tab
* format fixes
* added the hoverable link
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
* Update InfoModal.tsx
* removed hover from chunks
* remove…
* lint fixes
* Fix typo: correct 'josn_obj' to 'json_obj' (#697)
* Staging To Main (#495)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
* Dev (#433)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
---------
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: Ajay Meena
Co-authored-by: Morgan Senechal
Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com>
* fixed gcs status message issue
* added if check for failed count
* Null issue Fixed from backend for upload API and graph_document when model name mismatch
* added word break issue
* Added neo4j-rust-ext
* processing time estimation based on bytes
* File extension upper case fixed, File delete from GCS or local based on env variable.
* timer per byte
* Update Dockerfile
* Adding sort rows on the table (#451)
* Gcs upload folder hashed (#453)
* implement foldername hashed in GCS bucket uplaod
* Raise exception if invalid model selected
* folder name for gcs upload
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* upload all unstructuredfiles to gcs (#455)
* Mofified chunk query (#454)
* Added libre office for fixing error -- soffice command was not found. Please install libreoffice
on your system and try again.
- Install instructions: https://www.libreoffice.org/get-help/install-howto/
- Mac: https://formulae.brew.sh/cask/libreoffice
- Debian: https://wiki.debian.org/LibreOffice"
* Fix the PARTIAL CONTENT issue
* File-table no data found (#456)
* 'file-table''
* review comment
* Llm format change (#459)
* changed the llm models format to lowercase
* added the error message
* llm model changes
* format fixes
* removed unused import
* added the capitalize method
* delete files from merged_file_path only if source is local file
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* commented total page code (#460)
* format fixes
* removed the disabled check on dropdown
* Large file env
* DEV to STAGING (#461)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
* fixed gcs status message issue
* added if check for failed count
* Null issue Fixed from backend for upload API and graph_document when model name mismatch
* added word break issue
* Added neo4j-rust-ext
* processing time estimation based on bytes
* File extension upper case fixed, File delete from GCS or local based on env variable.
* timer per byte
* Update Dockerfile
* Adding sort rows on the table (#451)
* Gcs upload folder hashed (#453)
* implement foldername hashed in GCS bucket uplaod
* Raise exception if invalid model selected
* folder name for gcs upload
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* upload all unstructuredfiles to gcs (#455)
* Mofified chunk query (#454)
* Added libre office for fixing error -- soffice command was not found. Please install libreoffice
on your system and try again.
- Install instructions: https://www.libreoffice.org/get-help/install-howto/
- Mac: https://formulae.brew.sh/cask/libreoffice
- Debian: https://wiki.debian.org/LibreOffice"
* Fix the PARTIAL CONTENT issue
* File-table no data found (#456)
* 'file-table''
* review comment
* Llm format change (#459)
* changed the llm models format to lowercase
* added the error message
* llm model changes
* format fixes
* removed unused import
* added the capitalize method
* delete files from merged_file_path only if source is local file
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* commented total page code (#460)
* format fixes
* removed the disabled check on dropdown
* Large file env
---------
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: Ajay Meena
Co-authored-by: Morgan Senechal
Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com>
* DEV to STAGING (#462)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
* fixed gcs status message issue
* added if check for failed count
* Null issue Fixed from backend for upload API and graph_document when model name mismatch
* added word break issue
* Added neo4j-rust-ext
* processing time estimation based on bytes
* File extension upper case fixed, File delete from GCS or local based on env variable.
* timer per byte
* Update Dockerfile
* Adding sort rows on the table (#451)
* Gcs upload folder hashed (#453)
* implement foldername hashed in GCS bucket uplaod
* Raise exception if invalid model selected
* folder name for gcs upload
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* upload all unstructuredfiles to gcs (#455)
* Mofified chunk query (#454)
* Added libre office for fixing error -- soffice command was not found. Please install libreoffice
on your system and try again.
- Install instructions: https://www.libreoffice.org/get-help/install-howto/
- Mac: https://formulae.brew.sh/cask/libreoffice
- Debian: https://wiki.debian.org/LibreOffice"
* Fix the PARTIAL CONTENT issue
* File-table no data found (#456)
* 'file-table''
* review comment
* Llm format change (#459)
* changed the llm models format to lowercase
* added the error message
* llm model changes
* format fixes
* removed unused import
* added the capitalize method
* delete files from merged_file_path only if source is local file
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* commented total page code (#460)
* format fixes
* removed the disabled check on dropdown
* Large file env
---------
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: Ajay Meena
Co-authored-by: Morgan Senechal
Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com>
* added upload api
* changed the dropzone error message
* Dev to staging (#466)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
* fixed gcs status message issue
* added if check for failed count
* Null issue Fixed from backend for upload API and graph_document when model name mismatch
* added word break issue
* Added neo4j-rust-ext
* processing time estimation based on bytes
* File extension upper case fixed, File delete from GCS or local based on env variable.
* timer per byte
* Update Dockerfile
* Adding sort rows on the table (#451)
* Gcs upload folder hashed (#453)
* implement foldername hashed in GCS bucket uplaod
* Raise exception if invalid model selected
* folder name for gcs upload
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* upload all unstructuredfiles to gcs (#455)
* Mofified chunk query (#454)
* Added libre office for fixing error -- soffice command was not found. Please install libreoffice
on your system and try again.
- Install instructions: https://www.libreoffice.org/get-help/install-howto/
- Mac: https://formulae.brew.sh/cask/libreoffice
- Debian: https://wiki.debian.org/LibreOffice"
* Fix the PARTIAL CONTENT issue
* File-table no data found (#456)
* 'file-table''
* review comment
* Llm format change (#459)
* changed the llm models format to lowercase
* added the error message
* llm model changes
* format fixes
* removed unused import
* added the capitalize method
* delete files from merged_file_path only if source is local file
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* commented total page code (#460)
* format fixes
* removed the disabled check on dropdown
* Large file env
* added upload api
* changed the dropzone error message
---------
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: Ajay Meena
Co-authored-by: Morgan Senechal
Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com>
* format fixes
* Close connect when graph object is not none
* Call garbage collector to release the menory
* Change error message
* Added driver config as user_agent
* Updated doc for the LLM_MODELS and GCS_FILE_CACHE (#473)
* Web URLs are user input (#475)
* web url support backend
* added the tabs for input source
* user agent added for Neo4jGraph connection
* Tab view for sources
* extract handling for web ur's
* initial input handling
* chunk creation before processing
* code structure
* format fixes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* changed the regex for web and cancel button naming
* changed the schema dropdown type
* readme updates
* PROD version fix
* changed the alert message for gcs
* Delete unconnected entities from DB (#482)
* 457 add schema before generate graph (#478)
* schema setting from generate graph
* changes
* changes
* badge changes
* bug fix
* Fulltext index and Update similarity graph (#479)
* added full_text index
* added one common function for post_processing
* post processing api
* added tasks param
* modifed logging
* post processing changes
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
* Graph and vector search (#485)
* Modified the retrival query
* added the chatmode toggle component
* Modified to vector search
* Moved the templates to constants
* added the icons
* added chat modes
* code structure changes
* Intergrated the API changges
* Modified retrieval queries,refactored code
* API integration changes
* added the score
* order change
* wording change
* modified constants
* added graph+vector
* added the tooltips
* Modified query
* removed the graph mode
* tooltip camel Case
* added the icon and extern link for web source in the info modal
* added the youtube link in the source used tab
* format fixes
* added the hoverable link
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
* Update InfoModal.tsx
* removed hover from chunks
* remove…
* lint fixes
* connection _check
* Fix typo: correct 'josn_obj' to 'json_obj' (#697)
* Staging To Main (#495)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
* Dev (#433)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
---------
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: Ajay Meena
Co-authored-by: Morgan Senechal
Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com>
* fixed gcs status message issue
* added if check for failed count
* Null issue Fixed from backend for upload API and graph_document when model name mismatch
* added word break issue
* Added neo4j-rust-ext
* processing time estimation based on bytes
* File extension upper case fixed, File delete from GCS or local based on env variable.
* timer per byte
* Update Dockerfile
* Adding sort rows on the table (#451)
* Gcs upload folder hashed (#453)
* implement foldername hashed in GCS bucket uplaod
* Raise exception if invalid model selected
* folder name for gcs upload
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* upload all unstructuredfiles to gcs (#455)
* Mofified chunk query (#454)
* Added libre office for fixing error -- soffice command was not found. Please install libreoffice
on your system and try again.
- Install instructions: https://www.libreoffice.org/get-help/install-howto/
- Mac: https://formulae.brew.sh/cask/libreoffice
- Debian: https://wiki.debian.org/LibreOffice"
* Fix the PARTIAL CONTENT issue
* File-table no data found (#456)
* 'file-table''
* review comment
* Llm format change (#459)
* changed the llm models format to lowercase
* added the error message
* llm model changes
* format fixes
* removed unused import
* added the capitalize method
* delete files from merged_file_path only if source is local file
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* commented total page code (#460)
* format fixes
* removed the disabled check on dropdown
* Large file env
* DEV to STAGING (#461)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
* fixed gcs status message issue
* added if check for failed count
* Null issue Fixed from backend for upload API and graph_document when model name mismatch
* added word break issue
* Added neo4j-rust-ext
* processing time estimation based on bytes
* File extension upper case fixed, File delete from GCS or local based on env variable.
* timer per byte
* Update Dockerfile
* Adding sort rows on the table (#451)
* Gcs upload folder hashed (#453)
* implement foldername hashed in GCS bucket uplaod
* Raise exception if invalid model selected
* folder name for gcs upload
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* upload all unstructuredfiles to gcs (#455)
* Mofified chunk query (#454)
* Added libre office for fixing error -- soffice command was not found. Please install libreoffice
on your system and try again.
- Install instructions: https://www.libreoffice.org/get-help/install-howto/
- Mac: https://formulae.brew.sh/cask/libreoffice
- Debian: https://wiki.debian.org/LibreOffice"
* Fix the PARTIAL CONTENT issue
* File-table no data found (#456)
* 'file-table''
* review comment
* Llm format change (#459)
* changed the llm models format to lowercase
* added the error message
* llm model changes
* format fixes
* removed unused import
* added the capitalize method
* delete files from merged_file_path only if source is local file
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* commented total page code (#460)
* format fixes
* removed the disabled check on dropdown
* Large file env
---------
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: Ajay Meena
Co-authored-by: Morgan Senechal
Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com>
* DEV to STAGING (#462)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
* fixed gcs status message issue
* added if check for failed count
* Null issue Fixed from backend for upload API and graph_document when model name mismatch
* added word break issue
* Added neo4j-rust-ext
* processing time estimation based on bytes
* File extension upper case fixed, File delete from GCS or local based on env variable.
* timer per byte
* Update Dockerfile
* Adding sort rows on the table (#451)
* Gcs upload folder hashed (#453)
* implement foldername hashed in GCS bucket uplaod
* Raise exception if invalid model selected
* folder name for gcs upload
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* upload all unstructuredfiles to gcs (#455)
* Mofified chunk query (#454)
* Added libre office for fixing error -- soffice command was not found. Please install libreoffice
on your system and try again.
- Install instructions: https://www.libreoffice.org/get-help/install-howto/
- Mac: https://formulae.brew.sh/cask/libreoffice
- Debian: https://wiki.debian.org/LibreOffice"
* Fix the PARTIAL CONTENT issue
* File-table no data found (#456)
* 'file-table''
* review comment
* Llm format change (#459)
* changed the llm models format to lowercase
* added the error message
* llm model changes
* format fixes
* removed unused import
* added the capitalize method
* delete files from merged_file_path only if source is local file
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* commented total page code (#460)
* format fixes
* removed the disabled check on dropdown
* Large file env
---------
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: Ajay Meena
Co-authored-by: Morgan Senechal
Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com>
* added upload api
* changed the dropzone error message
* Dev to staging (#466)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
* fixed gcs status message issue
* added if check for failed count
* Null issue Fixed from backend for upload API and graph_document when model name mismatch
* added word break issue
* Added neo4j-rust-ext
* processing time estimation based on bytes
* File extension upper case fixed, File delete from GCS or local based on env variable.
* timer per byte
* Update Dockerfile
* Adding sort rows on the table (#451)
* Gcs upload folder hashed (#453)
* implement foldername hashed in GCS bucket uplaod
* Raise exception if invalid model selected
* folder name for gcs upload
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* upload all unstructuredfiles to gcs (#455)
* Mofified chunk query (#454)
* Added libre office for fixing error -- soffice command was not found. Please install libreoffice
on your system and try again.
- Install instructions: https://www.libreoffice.org/get-help/install-howto/
- Mac: https://formulae.brew.sh/cask/libreoffice
- Debian: https://wiki.debian.org/LibreOffice"
* Fix the PARTIAL CONTENT issue
* File-table no data found (#456)
* 'file-table''
* review comment
* Llm format change (#459)
* changed the llm models format to lowercase
* added the error message
* llm model changes
* format fixes
* removed unused import
* added the capitalize method
* delete files from merged_file_path only if source is local file
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* commented total page code (#460)
* format fixes
* removed the disabled check on dropdown
* Large file env
* added upload api
* changed the dropzone error message
---------
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: Ajay Meena
Co-authored-by: Morgan Senechal
Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com>
* format fixes
* Close connect when graph object is not none
* Call garbage collector to release the menory
* Change error message
* Added driver config as user_agent
* Updated doc for the LLM_MODELS and GCS_FILE_CACHE (#473)
* Web URLs are user input (#475)
* web url support backend
* added the tabs for input source
* user agent added for Neo4jGraph connection
* Tab view for sources
* extract handling for web ur's
* initial input handling
* chunk creation before processing
* code structure
* format fixes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* changed the regex for web and cancel button naming
* changed the schema dropdown type
* readme updates
* PROD version fix
* changed the alert message for gcs
* Delete unconnected entities from DB (#482)
* 457 add schema before generate graph (#478)
* schema setting from generate graph
* changes
* changes
* badge changes
* bug fix
* Fulltext index and Update similarity graph (#479)
* added full_text index
* added one common function for post_processing
* post processing api
* added tasks param
* modifed logging
* post processing changes
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
* Graph and vector search (#485)
* Modified the retrival query
* added the chatmode toggle component
* Modified to vector search
* Moved the templates to constants
* added the icons
* added chat modes
* code structure changes
* Intergrated the API changges
* Modified retrieval queries,refactored code
* API integration changes
* added the score
* order change
* wording change
* modified constants
* added graph+vector
* added the tooltips
* Modified query
* removed the graph mode
* tooltip camel Case
* added the icon and extern link for web source in the info modal
* added the youtube link in the source used tab
* format fixes
* added the hoverable link
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
* Update InfoModal.tsx
* removed hover from chunks
* remove…
* lint fixes
* connection _check
* Dev (#701)
* lint fixes
* connection _check
---------
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
* Chatbot changes (#700)
* added Hybrid search from graph
* modified mode params
* fixed issue delete entities return count
* removed specified version due to dependency clashes between versions
* updated script"integration test cases"
* decreased the delay for pollintg API
* Graph enhancements (#696)
* relationship Changes
* addition of relationship labels
* onclick to nodes
* node-highlight
* Build fixex
* slash docker change
* deactivating previous node/relationshsips
* lint fixes
* class issue
* search
* search on basis of id / captions
* debounce changes
* class changes (#693)
* legends highlight
* search query reset
* node size
* changed chat mode names (#702)
* DEV to STAGING (#703)
* Chatbot changes (#700)
* added Hybrid search from graph
* modified mode params
* fixed issue delete entities return count
* removed specified version due to dependency clashes between versions
* updated script"integration test cases"
* decreased the delay for pollintg API
* Graph enhancements (#696)
* relationship Changes
* addition of relationship labels
* onclick to nodes
* node-highlight
* Build fixex
* slash docker change
* deactivating previous node/relationshsips
* lint fixes
* class issue
* search
* search on basis of id / captions
* debounce changes
* class changes (#693)
* legends highlight
* search query reset
* node size
* changed chat mode names (#702)
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
Co-authored-by: Pravesh1988
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
* env changes
* used axios instance for network calls
* disabled the toolip when dropdown is open state
* format fixes + chat mode naming changes
* mode added to info model for entities
* Dev (#705)
* connection _check
* Fix typo: correct 'josn_obj' to 'json_obj' (#697)
* Staging To Main (#495)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
* Dev (#433)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
---------
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: Ajay Meena
Co-authored-by: Morgan Senechal
Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com>
* fixed gcs status message issue
* added if check for failed count
* Null issue Fixed from backend for upload API and graph_document when model name mismatch
* added word break issue
* Added neo4j-rust-ext
* processing time estimation based on bytes
* File extension upper case fixed, File delete from GCS or local based on env variable.
* timer per byte
* Update Dockerfile
* Adding sort rows on the table (#451)
* Gcs upload folder hashed (#453)
* implement foldername hashed in GCS bucket uplaod
* Raise exception if invalid model selected
* folder name for gcs upload
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* upload all unstructuredfiles to gcs (#455)
* Mofified chunk query (#454)
* Added libre office for fixing error -- soffice command was not found. Please install libreoffice
on your system and try again.
- Install instructions: https://www.libreoffice.org/get-help/install-howto/
- Mac: https://formulae.brew.sh/cask/libreoffice
- Debian: https://wiki.debian.org/LibreOffice"
* Fix the PARTIAL CONTENT issue
* File-table no data found (#456)
* 'file-table''
* review comment
* Llm format change (#459)
* changed the llm models format to lowercase
* added the error message
* llm model changes
* format fixes
* removed unused import
* added the capitalize method
* delete files from merged_file_path only if source is local file
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* commented total page code (#460)
* format fixes
* removed the disabled check on dropdown
* Large file env
* DEV to STAGING (#461)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
* fixed gcs status message issue
* added if check for failed count
* Null issue Fixed from backend for upload API and graph_document when model name mismatch
* added word break issue
* Added neo4j-rust-ext
* processing time estimation based on bytes
* File extension upper case fixed, File delete from GCS or local based on env variable.
* timer per byte
* Update Dockerfile
* Adding sort rows on the table (#451)
* Gcs upload folder hashed (#453)
* implement foldername hashed in GCS bucket uplaod
* Raise exception if invalid model selected
* folder name for gcs upload
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* upload all unstructuredfiles to gcs (#455)
* Mofified chunk query (#454)
* Added libre office for fixing error -- soffice command was not found. Please install libreoffice
on your system and try again.
- Install instructions: https://www.libreoffice.org/get-help/install-howto/
- Mac: https://formulae.brew.sh/cask/libreoffice
- Debian: https://wiki.debian.org/LibreOffice"
* Fix the PARTIAL CONTENT issue
* File-table no data found (#456)
* 'file-table''
* review comment
* Llm format change (#459)
* changed the llm models format to lowercase
* added the error message
* llm model changes
* format fixes
* removed unused import
* added the capitalize method
* delete files from merged_file_path only if source is local file
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* commented total page code (#460)
* format fixes
* removed the disabled check on dropdown
* Large file env
---------
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: Ajay Meena
Co-authored-by: Morgan Senechal
Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com>
* DEV to STAGING (#462)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
* fixed gcs status message issue
* added if check for failed count
* Null issue Fixed from backend for upload API and graph_document when model name mismatch
* added word break issue
* Added neo4j-rust-ext
* processing time estimation based on bytes
* File extension upper case fixed, File delete from GCS or local based on env variable.
* timer per byte
* Update Dockerfile
* Adding sort rows on the table (#451)
* Gcs upload folder hashed (#453)
* implement foldername hashed in GCS bucket uplaod
* Raise exception if invalid model selected
* folder name for gcs upload
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* upload all unstructuredfiles to gcs (#455)
* Mofified chunk query (#454)
* Added libre office for fixing error -- soffice command was not found. Please install libreoffice
on your system and try again.
- Install instructions: https://www.libreoffice.org/get-help/install-howto/
- Mac: https://formulae.brew.sh/cask/libreoffice
- Debian: https://wiki.debian.org/LibreOffice"
* Fix the PARTIAL CONTENT issue
* File-table no data found (#456)
* 'file-table''
* review comment
* Llm format change (#459)
* changed the llm models format to lowercase
* added the error message
* llm model changes
* format fixes
* removed unused import
* added the capitalize method
* delete files from merged_file_path only if source is local file
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* commented total page code (#460)
* format fixes
* removed the disabled check on dropdown
* Large file env
---------
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: Ajay Meena
Co-authored-by: Morgan Senechal
Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com>
* added upload api
* changed the dropzone error message
* Dev to staging (#466)
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* recent merges
* pdf deletion due to out of diskspace
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* Convert is_cancelled value from string to bool
* added the default page size
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* offset in chunks (#389)
* page number in gcs loader (#393)
* added youtube timestamps (#392)
* chat pop up button (#387)
* expand
* minimize-icon
* css changes
* chat history
* chatbot wider Side Nav
* expand icon
* chatbot UI
* Delete
* merge fixes
* code suggestions
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* chunks create before extraction using is_pre_process variable (#383)
* chunks create before extraction using is_pre_process variable
* Return total pages for Model
* update requirement.txt
* total pages on uplaod API
* added the Confirmation Dialog
* added the selected files into the confirmation modal
* format and lint fixes
* added the stop watch image
* fileselection on alert dialog
* Add timeout in docker for gunicorn workers
* Add cancel icon to info popup (#384)
* Info Modal Changes
* css changes
* recent merges
* Integration_qa test (#375)
* Test IntegrationQA added
* update test cases
* update test
* update node count assertions
* test changes
* update changes
* modification test
* Code refatctor test cases
* Handle allowedlist issue in test
* test changes
* update test
* test case execution
* test chatbot updates
* test case update file
* added file
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* fixed status blank issue
* Rendering the file name instead of link for gcs and s3 sources in the info modal
* added the default page size
* Convert is_cancelled value from string to bool
* Issue fixed Processed chunked as 0 when file re-process again
* Youtube timestamps (#386)
* Wikipedia source to accept all valid urls
* wikipedia url to support multiple languages
* integrated wiki langauge param for extract api
* Youtube video timestamps
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* groq llm integration backend (#286)
* groq llm integration backend
* groq and description in node properties
* added groq in options
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Save Total Pages in DB
* Added total Pages
* file selection when we didn't select anything from Main table
* added the danger icon only for large files
* added the overflow for more files and file selection for all new files
* moved the interface to types
* added the icon accoroding to the source
* set total page for wiki and youtube
* h3 heading
* merge
* updated the alert on basis if total pages
* deleted chunks
* polling based on total pages
* isNan check
* large file based on file size for s3 and gcs
* file source in server side event
* time calculation based on chunks for gcs and s3
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* fixed the layout issue
* Populate graph schema (#399)
* crreate new endpoint populate_graph_schema and update the query for getting lables from DB
* Added main.py changes
* conditionally-including-the-gcs-login-flow-in-gcs-as-source (#396)
* added the condtion
* removed llms
* Fixed issue : Remove extra unused param
* get emb only if used (#278)
* Chatbot chunks (#402)
* Added file name to the content sent to LLM
* added chunk text in the response
* increased the docs parts sent to llm
* Modified graph query
* mardown rendering
* youtube starttime
* icons
* offset changes
* removed the files due to codespace space issue
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user (#405)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* fixed css issue
* fixed status blank issue
* Modified response when no docs is retrived (#413)
* Fixed env/docker-compose for local deployments + README doc (#410)
* Fixed env/docker-compose for local deployments + README doc
* wrong place for ENV in README
* by default, removed langsmith + fixed knn score string to float
* by default, removed langsmith + fixed knn score string to float
* Fixed strings in docker-compose env
* Added requirements (neo4j 5.15 or later, APOC, and instructions for Neo4j Desktop)
* Missed the TIME_PER_PAGE env, was causing NaN issue in the approx time processing notification. fixed that
* Support for all unstructured files (#401)
* all unstructured files
* responsiveness
* added file type
* added the extensions
* spell mistake
* ppt file changes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Settings modal to support generating the labels from the llm by using text given by user with checkbox (#415)
* added the json
* added schema from text dialog
* integrated the schemaAPI
* added the alert
* resize fixes
* Extract schema using direct ChatOpenAI API and Chain
* integrated the checkbox for schema to text dialog
* Update SettingModal.tsx
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* gcs file content read via storage client (#417)
* gcs file content read via storage client
* added the access token the file state
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* pypdf2 to read files from gcs (#420)
* 407 remove driver from frontend (#416)
* removed driver
* removed API
* connecting to database on page refresh
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Css handling of info modal and Tooltips (#418)
* css change
* toolTips
* Sidebar Tooltips
* copy to clip
* css change
* added image types
* added gcs
* type fix
* docker changes
* speech
* added the toolip for dropzone sources
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed retrival bugs (#421)
* yarn format fixes
* changed the delete message
* added the cancel button
* changed the message on tooltip
* added space
* UI fixes
* tooltip for setting
* updated req
* wikipedia URL input (#424)
* accept only wikipedia links
* added wikipedia link
* added wikilink regex
* wikipedia single url only
* changed the alert message
* wording change
* pushed validation state persist error
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* speech and copy (#422)
* speech and copy
* startTime
* added chunk properties
* tooltips
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* Fixed issue for out of range in KNN API
* solved conflicts
* conflict solved
* Remove logging info from update KNN API
* tooltip changes
* format and lint fixes
* responsiveness changes
* Fixed issue for total pages GCS, S3
* UI polishing (#428)
* button and tooltip changes
* checking validation on change
* settings module populate fix
* format fixes
* opening the modal after auth success
* removed the limit
* added the scrobar for dropdowns
* speech state (#426)
* speech state
* Button Details changes
* delete wording change
* Total pages in buckets (#431)
* page number NA for buckets
* added N/A for gcs and s3 pages
* total pages for gcs
* remove unwanted logger
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* removed the max width
* Update FileTable.tsx
* Update the docker file
* Modified prompt (#438)
* Update Dockerfile
* Update Dockerfile
* Update Dockerfile
* rendering Fix
* Local file upload gcs (#442)
* Uplaod file to GCS
* GCS local upload fixed issue and delete file from GCS after processing and failed or cancelled
* Add life cycle rule on uploaded bucket
* pdf upload local and gcs bucket check
* delete files when processed and extract changes
---------
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
* Modified chat length and entities used (#443)
* metadata for unstructured files (#446)
* Unstructured file metadata (#447)
* metadata for unstructured files
* sleep in gcs upload
* updated
* icons added to chunks (#435)
* icons added to chunks
* info modal icons
* fixed gcs status message issue
* added if check for failed count
* Null issue Fixed from backend for upload API and graph_document when model name mismatch
* added word break issue
* Added neo4j-rust-ext
* processing time estimation based on bytes
* File extension upper case fixed, File delete from GCS or local based on env variable.
* timer per byte
* Update Dockerfile
* Adding sort rows on the table (#451)
* Gcs upload folder hashed (#453)
* implement foldername hashed in GCS bucket uplaod
* Raise exception if invalid model selected
* folder name for gcs upload
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* upload all unstructuredfiles to gcs (#455)
* Mofified chunk query (#454)
* Added libre office for fixing error -- soffice command was not found. Please install libreoffice
on your system and try again.
- Install instructions: https://www.libreoffice.org/get-help/install-howto/
- Mac: https://formulae.brew.sh/cask/libreoffice
- Debian: https://wiki.debian.org/LibreOffice"
* Fix the PARTIAL CONTENT issue
* File-table no data found (#456)
* 'file-table''
* review comment
* Llm format change (#459)
* changed the llm models format to lowercase
* added the error message
* llm model changes
* format fixes
* removed unused import
* added the capitalize method
* delete files from merged_file_path only if source is local file
---------
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
* commented total page code (#460)
* format fixes
* removed the disabled check on dropdown
* Large file env
* added upload api
* changed the dropzone error message
---------
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: Ajay Meena
Co-authored-by: Morgan Senechal
Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com>
* format fixes
* Close connect when graph object is not none
* Call garbage collector to release the menory
* Change error message
* Added driver config as user_agent
* Updated doc for the LLM_MODELS and GCS_FILE_CACHE (#473)
* Web URLs are user input (#475)
* web url support backend
* added the tabs for input source
* user agent added for Neo4jGraph connection
* Tab view for sources
* extract handling for web ur's
* initial input handling
* chunk creation before processing
* code structure
* format fixes
---------
Co-authored-by: kartikpersistent <101251502+kartikpersistent@users.noreply.github.com>
* changed the regex for web and cancel button naming
* changed the schema dropdown type
* readme updates
* PROD version fix
* changed the alert message for gcs
* Delete unconnected entities from DB (#482)
* 457 add schema before generate graph (#478)
* schema setting from generate graph
* changes
* changes
* badge changes
* bug fix
* Fulltext index and Update similarity graph (#479)
* added full_text index
* added one common function for post_processing
* post processing api
* added tasks param
* modifed logging
* post processing changes
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
* Graph and vector search (#485)
* Modified the retrival query
* added the chatmode toggle component
* Modified to vector search
* Moved the templates to constants
* added the icons
* added chat modes
* code structure changes
* Intergrated the API changges
* Modified retrieval queries,refactored code
* API integration changes
* added the score
* order change
* wording change
* modified constants
* added graph+vector
* added the tooltips
* Modified query
* removed the graph mode
* tooltip camel Case
* added the icon and extern link for web source in the info modal
* added the youtube link in the source used tab
* format fixes
* added the hoverable link
---------
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
* Update InfoModal.tsx
…
* Issue fixed, List out of index while getting status of dicuement node
* default modes in staging
* processing count updated on cancel
* processing count update fix on cancel
* format fixes
* remove whitespace for enviroment variable which due to an error "xxx may not contain whitespace" (#707)
* updated disconnected nodes
* updated disconnected nodes
* fix: Processed count update on failed condition
* added disconnected and up nodes
* resetting the alert message on success scenario
* populate graph schema
* not clearing the password when there is error scenario
* fixed the vector index loading issue
* fix: empty credentials payload for recreate vector index api
* invoking the post processing after all processing completion
---------
Co-authored-by: Prakriti Solankey <156313631+prakriti-solankey@users.noreply.github.com>
Co-authored-by: vasanthasaikalluri <165021735+vasanthasaikalluri@users.noreply.github.com>
Co-authored-by: destiny966113 <90891243+destiny966113@users.noreply.github.com>
Co-authored-by: abhishekkumar-27 <164544129+abhishekkumar-27@users.noreply.github.com>
Co-authored-by: Pravesh Kumar <121786590+praveshkumar1988@users.noreply.github.com>
Co-authored-by: aashipandya <156318202+aashipandya@users.noreply.github.com>
Co-authored-by: Ajay Meena
Co-authored-by: Morgan Senechal
Co-authored-by: karanchellani <142801957+karanchellani@users.noreply.github.com>
Co-authored-by: Ikko Eltociear Ashimine
Co-authored-by: Pravesh1988
Co-authored-by: edenbuaa
---
README.md | 2 +-
backend/Performance_test.py | 1 +
backend/requirements.txt | 24 +-
backend/score.py | 51 +-
backend/src/QA_integration_new.py | 28 +-
backend/src/main.py | 161 ++++---
backend/src/shared/constants.py | 2 +-
backend/test_integrationqa.py | 439 ++++++++----------
example.env | 37 +-
frontend/Dockerfile | 3 +-
frontend/example.env | 1 -
frontend/src/API/Index.ts | 7 +
frontend/src/App.css | 11 +-
.../src/components/ChatBot/ChatInfoModal.tsx | 19 +-
.../src/components/ChatBot/ChatModeToggle.tsx | 7 +-
.../src/components/ChatBot/Info/InfoModal.tsx | 13 +-
frontend/src/components/Content.tsx | 74 +--
frontend/src/components/Dropdown.tsx | 11 +-
frontend/src/components/FileTable.tsx | 7 +
.../src/components/Graph/GraphViewModal.tsx | 290 ++++++++++--
frontend/src/components/Graph/LegendsChip.tsx | 10 +-
.../ConnectionModal/ConnectionModal.tsx | 116 ++---
.../VectorIndexMisMatchAlert.tsx | 11 +-
.../Deduplication/index.tsx | 2 +-
.../DeleteTabForOrphanNodes/index.tsx | 2 +-
frontend/src/components/QuickStarter.tsx | 2 +-
frontend/src/components/UI/Legend.tsx | 14 +-
frontend/src/components/UI/ShowAll.tsx | 38 ++
frontend/src/context/UsersFiles.tsx | 2 +-
frontend/src/hooks/useSse.tsx | 5 +-
frontend/src/services/CancelAPI.ts | 5 +-
frontend/src/services/ChunkEntitiesInfo.ts | 5 +-
frontend/src/services/CommonAPI.ts | 5 +-
frontend/src/services/ConnectAPI.ts | 5 +-
frontend/src/services/DeleteFiles.ts | 5 +-
frontend/src/services/DeleteOrphanNodes.ts | 5 +-
frontend/src/services/GetDuplicateNodes.ts | 5 +-
frontend/src/services/GetFiles.ts | 9 +-
.../src/services/GetNodeLabelsRelTypes.ts | 5 +-
frontend/src/services/GetOrphanNodes.ts | 5 +-
frontend/src/services/GraphQuery.ts | 5 +-
frontend/src/services/HealthStatus.ts | 7 +-
.../src/services/MergeDuplicateEntities.ts | 5 +-
frontend/src/services/PollingAPI.ts | 9 +-
frontend/src/services/PostProcessing.ts | 5 +-
frontend/src/services/QnaAPI.ts | 7 +-
frontend/src/services/SchemaFromTextAPI.ts | 5 +-
frontend/src/services/URLScan.ts | 5 +-
frontend/src/services/vectorIndexCreation.ts | 5 +-
frontend/src/types.ts | 39 +-
frontend/src/utils/Constants.ts | 10 +-
frontend/src/utils/Utils.ts | 10 +-
frontend/yarn.lock | 2 +-
53 files changed, 920 insertions(+), 638 deletions(-)
create mode 100644 frontend/src/API/Index.ts
create mode 100644 frontend/src/components/UI/ShowAll.tsx
diff --git a/README.md b/README.md
index 800721ec3..e0b59346d 100644
--- a/README.md
+++ b/README.md
@@ -149,7 +149,7 @@ Allow unauthenticated request : Yes
| VITE_LLM_MODELS | Mandatory | diffbot,openai-gpt-3.5,openai-gpt-4o | Models available for selection on the frontend, used for entities extraction and Q&A
| VITE_CHAT_MODES | Mandatory | vector,graph+vector,graph,hybrid | Chat modes available for Q&A
| VITE_ENV | Mandatory | DEV or PROD | Environment variable for the app |
-| VITE_TIME_PER_CHUNK | Optional | 4 | Time per chunk for processing |
+| VITE_TIME_PER_PAGE | Optional | 50 | Time per page for processing |
| VITE_CHUNK_SIZE | Optional | 5242880 | Size of each chunk of file for upload |
| VITE_GOOGLE_CLIENT_ID | Optional | | Client ID for Google authentication |
| GCS_FILE_CACHE | Optional | False | If set to True, will save the files to process into GCS. If set to False, will save the files locally |
diff --git a/backend/Performance_test.py b/backend/Performance_test.py
index fc0aee66f..712d3daf1 100644
--- a/backend/Performance_test.py
+++ b/backend/Performance_test.py
@@ -94,6 +94,7 @@ def performance_main():
for _ in range(CONCURRENT_REQUESTS):
futures.append(executor.submit(post_request_chunk))
+ # Chatbot request futures
# Chatbot request futures
# for message in CHATBOT_MESSAGES:
# futures.append(executor.submit(chatbot_request, message))
diff --git a/backend/requirements.txt b/backend/requirements.txt
index ab42a749d..46c57aea5 100644
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
@@ -69,18 +69,18 @@ jsonpath-python==1.0.6
jsonpointer==2.4
json-repair==0.25.2
kiwisolver==1.4.5
-langchain==0.2.8
-langchain-aws==0.1.9
-langchain-anthropic==0.1.19
-langchain-fireworks==0.1.4
-langchain-google-genai==1.0.7
-langchain-community==0.2.7
-langchain-core==0.2.19
-langchain-experimental==0.0.62
-langchain-google-vertexai==1.0.6
-langchain-groq==0.1.6
-langchain-openai==0.1.14
-langchain-text-splitters==0.2.2
+langchain
+langchain-aws
+langchain-anthropic
+langchain-fireworks
+langchain-google-genai
+langchain-community
+langchain-core
+langchain-experimental
+langchain-google-vertexai
+langchain-groq
+langchain-openai
+langchain-text-splitters
langdetect==1.0.9
langsmith==0.1.83
layoutparser==0.3.4
diff --git a/backend/score.py b/backend/score.py
index 7b06def44..19f7ddb1a 100644
--- a/backend/score.py
+++ b/backend/score.py
@@ -106,8 +106,8 @@ async def create_source_knowledge_graph_url(
return create_api_response('Failed',message='source_type is other than accepted source')
message = f"Source Node created successfully for source type: {source_type} and source: {source}"
- josn_obj = {'api_name':'url_scan','db_url':uri,'url_scanned_file':lst_file_name, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))}
- logger.log_struct(josn_obj)
+ json_obj = {'api_name':'url_scan','db_url':uri,'url_scanned_file':lst_file_name, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))}
+ logger.log_struct(json_obj)
return create_api_response("Success",message=message,success_count=success_count,failed_count=failed_count,file_name=lst_file_name)
except Exception as e:
error_message = str(e)
@@ -209,9 +209,9 @@ async def extract_knowledge_graph_from_file(
else:
logging.info(f'Deleted File Path: {merged_file_path} and Deleted File Name : {file_name}')
delete_uploaded_local_file(merged_file_path,file_name)
- josn_obj = {'message':message,'error_message':error_message, 'file_name': file_name,'status':'Failed','db_url':uri,'failed_count':1, 'source_type': source_type, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))}
- logger.log_struct(josn_obj)
- logging.exception(f'File Failed in extraction: {josn_obj}')
+ json_obj = {'message':message,'error_message':error_message, 'file_name': file_name,'status':'Failed','db_url':uri,'failed_count':1, 'source_type': source_type, 'source_url':source_url, 'wiki_query':wiki_query, 'logging_time': formatted_time(datetime.now(timezone.utc))}
+ logger.log_struct(json_obj)
+ logging.exception(f'File Failed in extraction: {json_obj}')
return create_api_response('Failed', message=message + error_message[:100], error=error_message, file_name = file_name)
finally:
gc.collect()
@@ -226,8 +226,8 @@ async def get_source_list(uri:str, userName:str, password:str, database:str=None
if " " in uri:
uri = uri.replace(" ","+")
result = await asyncio.to_thread(get_source_list_from_graph,uri,userName,decoded_password,database)
- josn_obj = {'api_name':'sources_list','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
- logger.log_struct(josn_obj)
+ json_obj = {'api_name':'sources_list','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
+ logger.log_struct(json_obj)
return create_api_response("Success",data=result)
except Exception as e:
job_status = "Failed"
@@ -251,19 +251,20 @@ async def post_processing(uri=Form(), userName=Form(), password=Form(), database
if "materialize_text_chunk_similarities" in tasks:
await asyncio.to_thread(update_graph, graph)
- josn_obj = {'api_name': 'post_processing/materialize_text_chunk_similarities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
- logger.log_struct(josn_obj)
+ json_obj = {'api_name': 'post_processing/update_similarity_graph', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
+ logger.log_struct(json_obj)
logging.info(f'Updated KNN Graph')
+
if "enable_hybrid_search_and_fulltext_search_in_bloom" in tasks:
await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="entities")
- await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="keyword")
+ # await asyncio.to_thread(create_fulltext, uri=uri, username=userName, password=password, database=database,type="keyword")
josn_obj = {'api_name': 'post_processing/enable_hybrid_search_and_fulltext_search_in_bloom', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
logger.log_struct(josn_obj)
logging.info(f'Full Text index created')
if os.environ.get('ENTITY_EMBEDDING','False').upper()=="TRUE" and "materialize_entity_similarities" in tasks:
await asyncio.to_thread(create_entity_embedding, graph)
- josn_obj = {'api_name': 'post_processing/materialize_entity_similarities', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
- logger.log_struct(josn_obj)
+ json_obj = {'api_name': 'post_processing/create_entity_embedding', 'db_url': uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
+ logger.log_struct(json_obj)
logging.info(f'Entity Embeddings created')
return create_api_response('Success', message='All tasks completed successfully')
@@ -292,8 +293,8 @@ async def chat_bot(uri=Form(),model=Form(None),userName=Form(), password=Form(),
logging.info(f"Total Response time is {total_call_time:.2f} seconds")
result["info"]["response_time"] = round(total_call_time, 2)
- josn_obj = {'api_name':'chat_bot','db_url':uri,'session_id':session_id, 'logging_time': formatted_time(datetime.now(timezone.utc))}
- logger.log_struct(josn_obj)
+ json_obj = {'api_name':'chat_bot','db_url':uri,'session_id':session_id, 'logging_time': formatted_time(datetime.now(timezone.utc))}
+ logger.log_struct(json_obj)
return create_api_response('Success',data=result)
except Exception as e:
job_status = "Failed"
@@ -309,8 +310,8 @@ async def chunk_entities(uri=Form(),userName=Form(), password=Form(), chunk_ids=
try:
logging.info(f"URI: {uri}, Username: {userName}, chunk_ids: {chunk_ids}")
result = await asyncio.to_thread(get_entities_from_chunkids,uri=uri, username=userName, password=password, chunk_ids=chunk_ids)
- josn_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
- logger.log_struct(josn_obj)
+ json_obj = {'api_name':'chunk_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
+ logger.log_struct(json_obj)
return create_api_response('Success',data=result)
except Exception as e:
job_status = "Failed"
@@ -337,8 +338,8 @@ async def graph_query(
password=password,
document_names=document_names
)
- josn_obj = {'api_name':'graph_query','db_url':uri,'document_names':document_names, 'logging_time': formatted_time(datetime.now(timezone.utc))}
- logger.log_struct(josn_obj)
+ json_obj = {'api_name':'graph_query','db_url':uri,'document_names':document_names, 'logging_time': formatted_time(datetime.now(timezone.utc))}
+ logger.log_struct(json_obj)
return create_api_response('Success', data=result)
except Exception as e:
job_status = "Failed"
@@ -387,8 +388,8 @@ async def upload_large_file_into_chunks(file:UploadFile = File(...), chunkNumber
try:
graph = create_graph_database_connection(uri, userName, password, database)
result = await asyncio.to_thread(upload_file, graph, model, file, chunkNumber, totalChunks, originalname, uri, CHUNK_DIR, MERGED_DIR)
- josn_obj = {'api_name':'upload','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
- logger.log_struct(josn_obj)
+ json_obj = {'api_name':'upload','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
+ logger.log_struct(json_obj)
if int(chunkNumber) == int(totalChunks):
return create_api_response('Success',data=result, message='Source Node Created Successfully')
else:
@@ -409,8 +410,8 @@ async def get_structured_schema(uri=Form(), userName=Form(), password=Form(), da
graph = create_graph_database_connection(uri, userName, password, database)
result = await asyncio.to_thread(get_labels_and_relationtypes, graph)
logging.info(f'Schema result from DB: {result}')
- josn_obj = {'api_name':'schema','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
- logger.log_struct(josn_obj)
+ json_obj = {'api_name':'schema','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
+ logger.log_struct(json_obj)
return create_api_response('Success', data=result)
except Exception as e:
message="Unable to get the labels and relationtypes from neo4j database"
@@ -478,8 +479,8 @@ async def delete_document_and_entities(uri=Form(),
result, files_list_size = await asyncio.to_thread(graphDb_data_Access.delete_file_from_graph, filenames, source_types, deleteEntities, MERGED_DIR, uri)
# entities_count = result[0]['deletedEntities'] if 'deletedEntities' in result[0] else 0
message = f"Deleted {files_list_size} documents with entities from database"
- josn_obj = {'api_name':'delete_document_and_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
- logger.log_struct(josn_obj)
+ json_obj = {'api_name':'delete_document_and_entities','db_url':uri, 'logging_time': formatted_time(datetime.now(timezone.utc))}
+ logger.log_struct(json_obj)
return create_api_response('Success',message=message)
except Exception as e:
job_status = "Failed"
@@ -635,4 +636,4 @@ async def merge_duplicate_nodes(uri=Form(), userName=Form(), password=Form(), da
gc.collect()
if __name__ == "__main__":
- uvicorn.run(app)
\ No newline at end of file
+ uvicorn.run(app)
diff --git a/backend/src/QA_integration_new.py b/backend/src/QA_integration_new.py
index 1c0bc254c..eeac78c1e 100644
--- a/backend/src/QA_integration_new.py
+++ b/backend/src/QA_integration_new.py
@@ -41,26 +41,26 @@
def get_neo4j_retriever(graph, retrieval_query,document_names,mode,index_name="vector",keyword_index="keyword", search_k=CHAT_SEARCH_KWARG_K, score_threshold=CHAT_SEARCH_KWARG_SCORE_THRESHOLD):
try:
- if mode == "hybrid":
- # neo_db = Neo4jVector.from_existing_graph(
- # embedding=EMBEDDING_FUNCTION,
- # index_name=index_name,
- # retrieval_query=retrieval_query,
- # graph=graph,
- # search_type="hybrid",
- # node_label="Chunk",
- # embedding_node_property="embedding",
- # text_node_properties=["text"]
- # # keyword_index_name=keyword_index
- # )
- neo_db = Neo4jVector.from_existing_index(
+ if mode == "fulltext" or mode == "graph + vector + fulltext":
+ neo_db = Neo4jVector.from_existing_graph(
embedding=EMBEDDING_FUNCTION,
index_name=index_name,
retrieval_query=retrieval_query,
graph=graph,
search_type="hybrid",
+ node_label="Chunk",
+ embedding_node_property="embedding",
+ text_node_properties=["text"],
keyword_index_name=keyword_index
)
+ # neo_db = Neo4jVector.from_existing_index(
+ # embedding=EMBEDDING_FUNCTION,
+ # index_name=index_name,
+ # retrieval_query=retrieval_query,
+ # graph=graph,
+ # search_type="hybrid",
+ # keyword_index_name=keyword_index
+ # )
logging.info(f"Successfully retrieved Neo4jVector index '{index_name}' and keyword index '{keyword_index}'")
else:
neo_db = Neo4jVector.from_existing_index(
@@ -374,7 +374,7 @@ def QA_RAG(graph, model, question, document_names,session_id, mode):
"user": "chatbot"
}
return result
- elif mode == "vector" or mode == "hybrid":
+ elif mode == "vector" or mode == "fulltext":
retrieval_query = VECTOR_SEARCH_QUERY
else:
retrieval_query = VECTOR_GRAPH_SEARCH_QUERY.format(no_of_entites=VECTOR_GRAPH_SEARCH_ENTITY_LIMIT)
diff --git a/backend/src/main.py b/backend/src/main.py
index f7dd190ef..a7d5058a0 100644
--- a/backend/src/main.py
+++ b/backend/src/main.py
@@ -264,6 +264,7 @@ def processing_source(uri, userName, password, database, model, file_name, pages
graphDb_data_Access = graphDBdataAccess(graph)
result = graphDb_data_Access.get_current_status_document_node(file_name)
+ print(result)
logging.info("Break down file into chunks")
bad_chars = ['"', "\n", "'"]
for i in range(0,len(pages)):
@@ -277,91 +278,97 @@ def processing_source(uri, userName, password, database, model, file_name, pages
create_chunks_obj = CreateChunksofDocument(pages, graph)
chunks = create_chunks_obj.split_file_into_chunks()
chunkId_chunkDoc_list = create_relation_between_chunks(graph,file_name,chunks)
- if result[0]['Status'] != 'Processing':
- obj_source_node = sourceNode()
- status = "Processing"
- obj_source_node.file_name = file_name
- obj_source_node.status = status
- obj_source_node.total_chunks = len(chunks)
- obj_source_node.total_pages = len(pages)
- obj_source_node.model = model
- logging.info(file_name)
- logging.info(obj_source_node)
- graphDb_data_Access.update_source_node(obj_source_node)
-
- logging.info('Update the status as Processing')
- update_graph_chunk_processed = int(os.environ.get('UPDATE_GRAPH_CHUNKS_PROCESSED'))
- # selected_chunks = []
- is_cancelled_status = False
- job_status = "Completed"
- node_count = 0
- rel_count = 0
- for i in range(0, len(chunkId_chunkDoc_list), update_graph_chunk_processed):
- select_chunks_upto = i+update_graph_chunk_processed
- logging.info(f'Selected Chunks upto: {select_chunks_upto}')
- if len(chunkId_chunkDoc_list) <= select_chunks_upto:
- select_chunks_upto = len(chunkId_chunkDoc_list)
- selected_chunks = chunkId_chunkDoc_list[i:select_chunks_upto]
+
+ if len(result) > 0:
+ if result[0]['Status'] != 'Processing':
+ obj_source_node = sourceNode()
+ status = "Processing"
+ obj_source_node.file_name = file_name
+ obj_source_node.status = status
+ obj_source_node.total_chunks = len(chunks)
+ obj_source_node.total_pages = len(pages)
+ obj_source_node.model = model
+ logging.info(file_name)
+ logging.info(obj_source_node)
+ graphDb_data_Access.update_source_node(obj_source_node)
+
+ logging.info('Update the status as Processing')
+ update_graph_chunk_processed = int(os.environ.get('UPDATE_GRAPH_CHUNKS_PROCESSED'))
+ # selected_chunks = []
+ is_cancelled_status = False
+ job_status = "Completed"
+ node_count = 0
+ rel_count = 0
+ for i in range(0, len(chunkId_chunkDoc_list), update_graph_chunk_processed):
+ select_chunks_upto = i+update_graph_chunk_processed
+ logging.info(f'Selected Chunks upto: {select_chunks_upto}')
+ if len(chunkId_chunkDoc_list) <= select_chunks_upto:
+ select_chunks_upto = len(chunkId_chunkDoc_list)
+ selected_chunks = chunkId_chunkDoc_list[i:select_chunks_upto]
+ result = graphDb_data_Access.get_current_status_document_node(file_name)
+ is_cancelled_status = result[0]['is_cancelled']
+ logging.info(f"Value of is_cancelled : {result[0]['is_cancelled']}")
+ if bool(is_cancelled_status) == True:
+ job_status = "Cancelled"
+ logging.info('Exit from running loop of processing file')
+ exit
+ else:
+ node_count,rel_count = processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count)
+ end_time = datetime.now()
+ processed_time = end_time - start_time
+
+ obj_source_node = sourceNode()
+ obj_source_node.file_name = file_name
+ obj_source_node.updated_at = end_time
+ obj_source_node.processing_time = processed_time
+ obj_source_node.node_count = node_count
+ obj_source_node.processed_chunk = select_chunks_upto
+ obj_source_node.relationship_count = rel_count
+ graphDb_data_Access.update_source_node(obj_source_node)
+
result = graphDb_data_Access.get_current_status_document_node(file_name)
is_cancelled_status = result[0]['is_cancelled']
- logging.info(f"Value of is_cancelled : {result[0]['is_cancelled']}")
if bool(is_cancelled_status) == True:
- job_status = "Cancelled"
- logging.info('Exit from running loop of processing file')
- exit
- else:
- node_count,rel_count = processing_chunks(selected_chunks,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship,node_count, rel_count)
- end_time = datetime.now()
- processed_time = end_time - start_time
-
- obj_source_node = sourceNode()
- obj_source_node.file_name = file_name
- obj_source_node.updated_at = end_time
- obj_source_node.processing_time = processed_time
- obj_source_node.node_count = node_count
- obj_source_node.processed_chunk = select_chunks_upto
- obj_source_node.relationship_count = rel_count
- graphDb_data_Access.update_source_node(obj_source_node)
-
- result = graphDb_data_Access.get_current_status_document_node(file_name)
- is_cancelled_status = result[0]['is_cancelled']
- if bool(is_cancelled_status) == True:
- logging.info(f'Is_cancelled True at the end extraction')
- job_status = 'Cancelled'
- logging.info(f'Job Status at the end : {job_status}')
- end_time = datetime.now()
- processed_time = end_time - start_time
- obj_source_node = sourceNode()
- obj_source_node.file_name = file_name
- obj_source_node.status = job_status
- obj_source_node.processing_time = processed_time
+ logging.info(f'Is_cancelled True at the end extraction')
+ job_status = 'Cancelled'
+ logging.info(f'Job Status at the end : {job_status}')
+ end_time = datetime.now()
+ processed_time = end_time - start_time
+ obj_source_node = sourceNode()
+ obj_source_node.file_name = file_name
+ obj_source_node.status = job_status
+ obj_source_node.processing_time = processed_time
- graphDb_data_Access.update_source_node(obj_source_node)
- logging.info('Updated the nodeCount and relCount properties in Document node')
- logging.info(f'file:{file_name} extraction has been completed')
+ graphDb_data_Access.update_source_node(obj_source_node)
+ logging.info('Updated the nodeCount and relCount properties in Document node')
+ logging.info(f'file:{file_name} extraction has been completed')
- # merged_file_path have value only when file uploaded from local
-
- if is_uploaded_from_local:
- gcs_file_cache = os.environ.get('GCS_FILE_CACHE')
- if gcs_file_cache == 'True':
- folder_name = create_gcs_bucket_folder_name_hashed(uri, file_name)
- delete_file_from_gcs(BUCKET_UPLOAD,folder_name,file_name)
- else:
- delete_uploaded_local_file(merged_file_path, file_name)
+ # merged_file_path have value only when file uploaded from local
- return {
- "fileName": file_name,
- "nodeCount": node_count,
- "relationshipCount": rel_count,
- "processingTime": round(processed_time.total_seconds(),2),
- "status" : job_status,
- "model" : model,
- "success_count" : 1
- }
+ if is_uploaded_from_local:
+ gcs_file_cache = os.environ.get('GCS_FILE_CACHE')
+ if gcs_file_cache == 'True':
+ folder_name = create_gcs_bucket_folder_name_hashed(uri, file_name)
+ delete_file_from_gcs(BUCKET_UPLOAD,folder_name,file_name)
+ else:
+ delete_uploaded_local_file(merged_file_path, file_name)
+
+ return {
+ "fileName": file_name,
+ "nodeCount": node_count,
+ "relationshipCount": rel_count,
+ "processingTime": round(processed_time.total_seconds(),2),
+ "status" : job_status,
+ "model" : model,
+ "success_count" : 1
+ }
+ else:
+ logging.info('File does not process because it\'s already in Processing status')
else:
- logging.info('File does not process because it\'s already in Processing status')
+ error_message = "Unable to get the status of docuemnt node."
+ logging.error(error_message)
+ raise Exception(error_message)
def processing_chunks(chunkId_chunkDoc_list,graph,uri, userName, password, database,file_name,model,allowedNodes,allowedRelationship, node_count, rel_count):
#create vector index and update chunk node with embedding
diff --git a/backend/src/shared/constants.py b/backend/src/shared/constants.py
index 903999a51..c5f8e98a4 100644
--- a/backend/src/shared/constants.py
+++ b/backend/src/shared/constants.py
@@ -276,4 +276,4 @@
RETURN text, avg_score as score, {{length:size(text), source: COALESCE( CASE WHEN d.url CONTAINS "None" THEN d.fileName ELSE d.url END, d.fileName), chunkdetails: chunkdetails}} AS metadata
"""
-YOUTUBE_CHUNK_SIZE_SECONDS = 60
\ No newline at end of file
+YOUTUBE_CHUNK_SIZE_SECONDS = 60
diff --git a/backend/test_integrationqa.py b/backend/test_integrationqa.py
index cd662cbdc..821cc6b5c 100644
--- a/backend/test_integrationqa.py
+++ b/backend/test_integrationqa.py
@@ -1,270 +1,243 @@
+import json
+import os
+import shutil
+import logging
+import pandas as pd
+from datetime import datetime as dt
+from dotenv import load_dotenv
+
from score import *
from src.main import *
-import logging
from src.QA_integration_new import QA_RAG
from langserve import add_routes
-import asyncio
-import os
-from dotenv import load_dotenv
-import pandas as pd
-from datetime import datetime as dt
-uri = ''
-userName = ''
-password = ''
-# model = 'openai-gpt-3.5'
-database = 'neo4j'
+# Load environment variables if needed
+load_dotenv()
+
+# Constants
+URI = ''
+USERNAME = ''
+PASSWORD = ''
+DATABASE = 'neo4j'
CHUNK_DIR = os.path.join(os.path.dirname(__file__), "chunks")
MERGED_DIR = os.path.join(os.path.dirname(__file__), "merged_files")
-graph = create_graph_database_connection(uri, userName, password, database)
-
-
-def test_graph_from_file_local_file(model_name):
- model = model_name
- file_name = 'About Amazon.pdf'
- # shutil.copyfile('data/Bank of America Q23.pdf', 'backend/src/merged_files/Bank of America Q23.pdf')
- shutil.copyfile('/workspaces/llm-graph-builder/backend/files/About Amazon.pdf',
- '/workspaces/llm-graph-builder/backend/merged_files/About Amazon.pdf')
- obj_source_node = sourceNode()
- obj_source_node.file_name = file_name
- obj_source_node.file_type = 'pdf'
- obj_source_node.file_size = '1087'
- obj_source_node.file_source = 'local file'
- obj_source_node.model = model
- obj_source_node.created_at = datetime.now()
- graphDb_data_Access = graphDBdataAccess(graph)
- graphDb_data_Access.create_source_node(obj_source_node)
- merged_file_path = os.path.join(MERGED_DIR, file_name)
- print(merged_file_path)
-
-
- local_file_result = extract_graph_from_file_local_file(uri, userName, password, database, model, merged_file_path, file_name, '', '')
- # final_list.append(local_file_result)
- print(local_file_result)
-
- logging.info("Info: ")
- try:
- assert local_file_result['status'] == 'Completed' and local_file_result['nodeCount'] > 0 and local_file_result[
- 'relationshipCount'] > 0
- return local_file_result
- print("Success")
- except AssertionError as e:
- print("Fail: ", e)
- return local_file_result
-
-
-def test_graph_from_file_local_file_failed(model_name):
- model = model_name
- file_name = 'Not_exist.pdf'
- try:
- obj_source_node = sourceNode()
- obj_source_node.file_name = file_name
- obj_source_node.file_type = 'pdf'
- obj_source_node.file_size = '0'
- obj_source_node.file_source = 'local file'
- obj_source_node.model = model
- obj_source_node.created_at = datetime.now()
- graphDb_data_Access = graphDBdataAccess(graph)
- graphDb_data_Access.create_source_node(obj_source_node)
-
- local_file_result = extract_graph_from_file_local_file(graph, model, file_name, merged_file_path, '', '')
-
- print(local_file_result)
- except AssertionError as e:
- print('Failed due to file does not exist means not uploaded or accidentaly deleteled from server')
- print("Failed: Error from extract function ", e)
-
-# Check for Wikipedia file to be test
-def test_graph_from_Wikipedia(model_name):
- model = model_name
- wiki_query = 'https://en.wikipedia.org/wiki/Ram_Mandir'
- source_type = 'Wikipedia'
- file_name = "Ram_Mandir"
- create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type)
- wikiresult = extract_graph_from_file_Wikipedia(uri, userName, password, database, model, file_name, 1, 'en', '', '')
- logging.info("Info: Wikipedia test done")
- print(wikiresult)
-
+
+# Initialize database connection
+graph = create_graph_database_connection(URI, USERNAME, PASSWORD, DATABASE)
+
+def create_source_node_local(graph, model, file_name):
+ """Creates a source node for a local file."""
+ source_node = sourceNode()
+ source_node.file_name = file_name
+ source_node.file_type = 'pdf'
+ source_node.file_size = '1087'
+ source_node.file_source = 'local file'
+ source_node.model = model
+ source_node.created_at = dt.now()
+ graphDB_data_Access = graphDBdataAccess(graph)
+ graphDB_data_Access.create_source_node(source_node)
+ return source_node
+
+def test_graph_from_file_local(model_name):
+ """Test graph creation from a local file."""
+ file_name = 'About Amazon.pdf'
+ shutil.copyfile('/workspaces/llm-graph-builder/backend/files/About Amazon.pdf',
+ os.path.join(MERGED_DIR, file_name))
+
+ create_source_node_local(graph, model_name, file_name)
+ merged_file_path = os.path.join(MERGED_DIR, file_name)
+
+ local_file_result = extract_graph_from_file_local_file(
+ URI, USERNAME, PASSWORD, DATABASE, model_name, merged_file_path, file_name, '', ''
+ )
+ logging.info("Local file processing complete")
+ print(local_file_result)
+
try:
- assert wikiresult['status'] == 'Completed' and wikiresult['nodeCount'] > 0 and wikiresult['relationshipCount'] > 0
- return wikiresult
+ assert local_file_result['status'] == 'Completed'
+ assert local_file_result['nodeCount'] > 0
+ assert local_file_result['relationshipCount'] > 0
print("Success")
except AssertionError as e:
- print("Fail ", e)
- return wikiresult
-
+ print("Fail: ", e)
-def test_graph_from_Wikipedia_failed():
- wiki_query = 'Test QA 123456'
- source_type = 'Wikipedia'
- try:
- logging.info("Created source node for wikipedia")
- create_source_node_graph_url_wikipedia(graph, model, wiki_query, source_type)
- except AssertionError as e:
- print("Fail ", e)
+ return local_file_result
-# Check for Youtube_video to be Success
-def test_graph_from_youtube_video(model_name):
- model = model_name
- source_url = 'https://www.youtube.com/watch?v=T-qy-zPWgqA'
- source_type = 'youtube'
+def test_graph_from_wikipedia(model_name):
+ """Test graph creation from a Wikipedia page."""
+ wiki_query = 'https://en.wikipedia.org/wiki/Ram_Mandir'
+ source_type = 'Wikipedia'
+ file_name = "Ram_Mandir"
+ create_source_node_graph_url_wikipedia(graph, model_name, wiki_query, source_type)
- create_source_node_graph_url_youtube(graph, model, source_url, source_type)
- youtuberesult = extract_graph_from_file_youtube(uri, userName, password, database, model, source_url, '', '')
+ wiki_result = extract_graph_from_file_Wikipedia(URI, USERNAME, PASSWORD, DATABASE, model_name, file_name, 1, 'en', '', '')
+ logging.info("Wikipedia test done")
+ print(wiki_result)
- logging.info("Info: Youtube Video test done")
- print(youtuberesult)
try:
- assert youtuberesult['status'] == 'Completed' and youtuberesult['nodeCount'] > 1 and youtuberesult[
- 'relationshipCount'] > 1
- return youtuberesult
+ assert wiki_result['status'] == 'Completed'
+ assert wiki_result['nodeCount'] > 0
+ assert wiki_result['relationshipCount'] > 0
print("Success")
except AssertionError as e:
- print("Failed ", e)
- return youtuberesult
-
-# Check for Youtube_video to be Failed
-
-def test_graph_from_youtube_video_failed():
- url = 'https://www.youtube.com/watch?v=U9mJuUkhUzk'
- source_type = 'youtube'
-
- create_source_node_graph_url_youtube(graph, model, url, source_type)
- youtuberesult = extract_graph_from_file_youtube(graph, model, url, ',', ',')
- # print(result)
- print(youtuberesult)
- try:
- assert youtuberesult['status'] == 'Completed'
- return youtuberesult
- except AssertionError as e:
- print("Failed ", e)
-
-
-# Check for the GCS file to be uploaded, process and completed
-
-def test_graph_from_file_test_gcs():
- bucket_name = 'test'
- folder_name = 'test'
- source_type = 'gcs test bucket'
- file_name = 'Neuralink brain chip patient playing chess.pdf'
- create_source_node_graph_url_gcs(graph, model, bucket_name, folder_name, source_type)
- gcsresult = extract_graph_from_file_gcs(graph, model, bucket_name, folder_name, file_name, '', '')
-
- logging.info("Info")
- print(gcsresult)
-
- try:
- assert gcsresult['status'] == 'Completed' and gcsresult['nodeCount'] > 10 and gcsresult['relationshipCount'] > 5
- print("Success")
- except AssertionError as e:
- print("Failed ", e)
-
-
-def test_graph_from_file_test_gcs_failed():
- bucket_name = 'llm_graph_test'
- folder_name = 'test'
- source_type = 'gcs bucket'
- # file_name = 'Neuralink brain chip patient playing chess.pdf'
- try:
- create_source_node_graph_url_gcs(graph, model, bucket_name, folder_name, source_type)
- print("GCS: Create source node failed due to bucket not exist")
- except AssertionError as e:
- print("Failed ", e)
-
-
-def test_graph_from_file_test_s3_failed():
- source_url = 's3://development-llm-test/'
- try:
- create_source_node_graph_url_s3(graph, model, source_url, 'test123', 'pwd123')
- # assert result['status'] == 'Failed'
- # print("S3 created source node failed die to wrong access key id and secret")
- except AssertionError as e:
- print("Failed ", e)
-
-
-# Check the Functionality of Chatbot QnA for mode 'graph+vector'
-def test_chatbot_QnA(model_name):
- model = model_name
- QA_n_RAG = QA_RAG(graph, model, 'Tell me about amazon', '[]', 1, 'graph+vector')
+ print("Fail: ", e)
+
+ return wiki_result
+
+def test_graph_website(model_name):
+ """Test graph creation from a Website page."""
+ #graph, model, source_url, source_type
+ source_url = 'https://www.amazon.com/'
+ source_type = 'web-url'
+ create_source_node_graph_web_url(graph, model_name, source_url, source_type)
+
+ weburl_result = extract_graph_from_web_page(URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, '', '')
+ logging.info("WebUrl test done")
+ print(weburl_result)
- print(QA_n_RAG)
- print(len(QA_n_RAG['message']))
try:
- assert len(QA_n_RAG['message']) > 20
- return QA_n_RAG
+ assert weburl_result['status'] == 'Completed'
+ assert weburl_result['nodeCount'] > 0
+ assert weburl_result['relationshipCount'] > 0
print("Success")
except AssertionError as e:
- print("Failed ", e)
- return QA_n_RAG
+ print("Fail: ", e)
+ return weburl_result
-# Check the Functionality of Chatbot QnA for mode 'vector'
-def test_chatbot_QnA_vector(model_name):
- model = model_name
- QA_n_RAG_vector = QA_RAG(graph, model, 'Tell me about amazon', '[]', 1, 'vector')
+def test_graph_from_youtube_video(model_name):
+ """Test graph creation from a YouTube video."""
+ source_url = 'https://www.youtube.com/watch?v=T-qy-zPWgqA'
+ source_type = 'youtube'
+ create_source_node_graph_url_youtube(graph, model_name, source_url, source_type)
+ youtube_result = extract_graph_from_file_youtube(
+ URI, USERNAME, PASSWORD, DATABASE, model_name, source_url, '', ''
+ )
+ logging.info("YouTube Video test done")
+ print(youtube_result)
- print(QA_n_RAG_vector)
- print(len(QA_n_RAG_vector['message']))
try:
- assert len(QA_n_RAG_vector['message']) > 20
- return QA_n_RAG_vector
+ assert youtube_result['status'] == 'Completed'
+ assert youtube_result['nodeCount'] > 1
+ assert youtube_result['relationshipCount'] > 1
print("Success")
except AssertionError as e:
- print("Failed ", e)
- return QA_n_RAG_vector
-
-# Check the Functionality of Chatbot QnA for mode 'hybrid'
+ print("Failed: ", e)
-def test_chatbot_QnA_hybrid(model_name):
- model = model_name
- QA_n_RAG_hybrid = QA_RAG(graph, model, 'Tell me about amazon', '[]', 1, 'hybrid')
+ return youtube_result
+def test_chatbot_qna(model_name, mode='vector'):
+ """Test chatbot QnA functionality for different modes."""
+ QA_n_RAG = QA_RAG(graph, model_name, 'Tell me about amazon', '[]', 1, mode)
+ print(QA_n_RAG)
+ print(len(QA_n_RAG['message']))
- print(QA_n_RAG_hybrid)
- print(len(QA_n_RAG_hybrid['message']))
try:
- assert len(QA_n_RAG_hybrid['message']) > 20
- return QA_n_RAG_hybrid
+ assert len(QA_n_RAG['message']) > 20
+ return QA_n_RAG
print("Success")
except AssertionError as e:
print("Failed ", e)
- return QA_n_RAG_hybrid
-
-
+ return QA_n_RAG
+
+#Get Test disconnected_nodes list
+def disconected_nodes():
+ #graph = create_graph_database_connection(uri, userName, password, database)
+ graphDb_data_Access = graphDBdataAccess(graph)
+ nodes_list, total_nodes = graphDb_data_Access.list_unconnected_nodes()
+ print(nodes_list[0]["e"]["elementId"])
+ status = "False"
+
+ if total_nodes['total']>0:
+ status = "True"
+ else:
+ status = "False"
+
+ return nodes_list[0]["e"]["elementId"], status
+
+#Test Delete delete_disconnected_nodes list
+def delete_disconected_nodes(lst_element_id):
+ print(f'disconnect elementid list {lst_element_id}')
+ #graph = create_graph_database_connection(uri, userName, password, database)
+ graphDb_data_Access = graphDBdataAccess(graph)
+ result = graphDb_data_Access.delete_unconnected_nodes(json.dumps(lst_element_id))
+ print(f'delete disconnect api result {result}')
+ if not result:
+ return "True"
+ else:
+ return "False"
+
+#Test Get Duplicate_nodes
+def get_duplicate_nodes():
+ #graph = create_graph_database_connection(uri, userName, password, database)
+ graphDb_data_Access = graphDBdataAccess(graph)
+ nodes_list, total_nodes = graphDb_data_Access.get_duplicate_nodes_list()
+ if total_nodes['total']>0:
+ return "True"
+ else:
+ return "False"
+
+#Test populate_graph_schema
+def test_populate_graph_schema_from_text(model):
+ result_schema = populate_graph_schema_from_text('When Amazon was founded in 1993 by creator Jeff Benzos, it was mostly an online bookstore. Initially Amazon’s growth was very slow, not turning a profit until over 7 years after its founding. This was thanks to the great momentum provided by the dot-com bubble.', model, True)
+ print(result_schema)
+ return result_schema
+
+# def compare_graph_results(results):
+# """
+# Compare graph results across different models.
+# Add custom logic here to compare graph data, nodes, and relationships.
+# """
+# # Placeholder logic for comparison
+# print("Comparing results...")
+# for i in range(len(results) - 1):
+# result_a = results[i]
+# result_b = results[i + 1]
+# if result_a == result_b:
+# print(f"Result {i} is identical to result {i+1}")
+# else:
+# print(f"Result {i} differs from result {i+1}")
+
+def run_tests():
+ final_list = []
+ error_list = []
+ models = ['openai-gpt-3.5', 'openai-gpt-4o']
+
+ for model_name in models:
+ try:
+ final_list.append(test_graph_from_file_local(model_name))
+ final_list.append(test_graph_from_wikipedia(model_name))
+ final_list.append(test_populate_graph_schema_from_text(model_name))
+ final_list.append(test_graph_website(model_name))
+ final_list.append(test_graph_from_youtube_video(model_name))
+ final_list.append(test_chatbot_qna(model_name))
+ final_list.append(test_chatbot_qna(model_name, mode='vector'))
+ final_list.append(test_chatbot_qna(model_name, mode='graph+vector+fulltext'))
+ except Exception as e:
+ error_list.append((model_name, str(e)))
+ # #Compare and log diffrences in graph results
+ # # compare_graph_results(final_list) # Pass the final_list to comapre_graph_results
+ # test_populate_graph_schema_from_text('openai-gpt-4o')
+ dis_elementid, dis_status = disconected_nodes()
+ lst_element_id = [dis_elementid]
+ delt = delete_disconected_nodes(lst_element_id)
+ dup = get_duplicate_nodes()
+ # schma = test_populate_graph_schema_from_text(model)
+ # Save final results to CSV
+ df = pd.DataFrame(final_list)
+ print(df)
+ df['execution_date'] = dt.today().strftime('%Y-%m-%d')
+ df['disconnected_nodes']=dis_status
+ df['get_duplicate_nodes']=dup
+ df['delete_disconected_nodes']=delt
+ # df['test_populate_graph_schema_from_text'] = schma
+ df.to_csv(f"Integration_TestResult_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False)
+
+ # Save error details to CSV
+ df_errors = pd.DataFrame(error_list, columns=['Model', 'Error'])
+ df_errors['execution_date'] = dt.today().strftime('%Y-%m-%d')
+ df_errors.to_csv(f"Error_details_{dt.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False)
if __name__ == "__main__":
- final_list = []
- for model_name in ['openai-gpt-3.5','azure_ai_gpt_35','azure_ai_gpt_4o','anthropic_claude_3_5_sonnet','fireworks_llama_v3_70b','bedrock_claude_3_5_sonnet']:
-
- # local file
- response = test_graph_from_file_local_file(model_name)
- final_list.append(response)
-
- # # Wikipedia Test
- response = test_graph_from_Wikipedia(model_name)
- final_list.append(response)
-
- # # Youtube Test
- response= test_graph_from_youtube_video(model_name)
- final_list.append(response)
- # # print(final_list)
-
- # # test_graph_from_file_test_gcs(model_name) # GCS Test
-
- # #chatbot 'graph+vector'
- response = test_chatbot_QnA(model_name)
- final_list.append(response)
-
- # #chatbot 'vector'
- response = test_chatbot_QnA_vector(model_name)
- final_list.append(response)
-
- # #chatbot 'hybrid'
- response = test_chatbot_QnA_hybrid(model_name)
- final_list.append(response)
-
- # test_graph_from_file_test_s3_failed() # S3 Failed Test Case
- df = pd.DataFrame(final_list)
- df['execution_date']= datetime.today().strftime('%Y-%m-%d')
- df.to_csv(f"Integration_TestResult_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", index=False)
\ No newline at end of file
+ run_tests()
\ No newline at end of file
diff --git a/example.env b/example.env
index b443fd18d..23bcc6e06 100644
--- a/example.env
+++ b/example.env
@@ -1,27 +1,27 @@
# Mandatory
-OPENAI_API_KEY = ""
-DIFFBOT_API_KEY = ""
+OPENAI_API_KEY=""
+DIFFBOT_API_KEY=""
# Optional Backend
-EMBEDDING_MODEL = "all-MiniLM-L6-v2"
-IS_EMBEDDING = "true"
-KNN_MIN_SCORE = "0.94"
+EMBEDDING_MODEL="all-MiniLM-L6-v2"
+IS_EMBEDDING="true"
+KNN_MIN_SCORE="0.94"
# Enable Gemini (default is False) | Can be False or True
-GEMINI_ENABLED = False
+GEMINI_ENABLED=False
# LLM_MODEL_CONFIG_ollama_llama3="llama3,http://host.docker.internal:11434"
# Enable Google Cloud logs (default is False) | Can be False or True
-GCP_LOG_METRICS_ENABLED = False
-NUMBER_OF_CHUNKS_TO_COMBINE = 6
-UPDATE_GRAPH_CHUNKS_PROCESSED = 20
-NEO4J_URI = "neo4j://database:7687"
-NEO4J_USERNAME = "neo4j"
-NEO4J_PASSWORD = "password"
-LANGCHAIN_API_KEY = ""
-LANGCHAIN_PROJECT = ""
-LANGCHAIN_TRACING_V2 = "true"
-LANGCHAIN_ENDPOINT = "https://api.smith.langchain.com"
-GCS_FILE_CACHE = False
+GCP_LOG_METRICS_ENABLED=False
+NUMBER_OF_CHUNKS_TO_COMBINE=6
+UPDATE_GRAPH_CHUNKS_PROCESSED=20
+NEO4J_URI="neo4j://database:7687"
+NEO4J_USERNAME="neo4j"
+NEO4J_PASSWORD="password"
+LANGCHAIN_API_KEY=""
+LANGCHAIN_PROJECT=""
+LANGCHAIN_TRACING_V2="true"
+LANGCHAIN_ENDPOINT="https://api.smith.langchain.com"
+GCS_FILE_CACHE=False
ENTITY_EMBEDDING=True
# Optional Frontend
@@ -30,9 +30,8 @@ VITE_BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL=
VITE_REACT_APP_SOURCES="local,youtube,wiki,s3,web"
VITE_LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o" # ",ollama_llama3"
VITE_ENV="DEV"
-VITE_TIME_PER_CHUNK=4
VITE_TIME_PER_PAGE=50
VITE_CHUNK_SIZE=5242880
VITE_GOOGLE_CLIENT_ID=""
VITE_CHAT_MODES=""
-VITE_BATCH_SIZE=2
\ No newline at end of file
+VITE_BATCH_SIZE=2
diff --git a/frontend/Dockerfile b/frontend/Dockerfile
index 7a31d5bcf..c3a7c1c82 100644
--- a/frontend/Dockerfile
+++ b/frontend/Dockerfile
@@ -6,7 +6,6 @@ ARG VITE_REACT_APP_SOURCES=""
ARG VITE_LLM_MODELS=""
ARG VITE_GOOGLE_CLIENT_ID=""
ARG VITE_BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL={CONNECT_URL}&search=Show+me+a+graph&featureGenAISuggestions=true&featureGenAISuggestionsInternal=true"
-ARG VITE_TIME_PER_CHUNK=4
ARG VITE_TIME_PER_PAGE=50
ARG VITE_LARGE_FILE_SIZE=5242880
ARG VITE_CHUNK_SIZE=5242880
@@ -23,8 +22,8 @@ RUN VITE_BACKEND_API_URL=$VITE_BACKEND_API_URL \
VITE_LLM_MODELS=$VITE_LLM_MODELS \
VITE_GOOGLE_CLIENT_ID=$VITE_GOOGLE_CLIENT_ID \
VITE_BLOOM_URL=$VITE_BLOOM_URL \
- VITE_TIME_PER_CHUNK=$VITE_TIME_PER_CHUNK \
VITE_CHUNK_SIZE=$VITE_CHUNK_SIZE \
+ VITE_TIME_PER_PAGE=$VITE_TIME_PER_PAGE \
VITE_ENV=$VITE_ENV \
VITE_LARGE_FILE_SIZE=${VITE_LARGE_FILE_SIZE} \
VITE_CHAT_MODES=$VITE_CHAT_MODES \
diff --git a/frontend/example.env b/frontend/example.env
index 05b8cdf60..63bd3e7c3 100644
--- a/frontend/example.env
+++ b/frontend/example.env
@@ -3,7 +3,6 @@ VITE_BLOOM_URL="https://workspace-preview.neo4j.io/workspace/explore?connectURL=
VITE_REACT_APP_SOURCES="local,youtube,wiki,s3,web"
VITE_LLM_MODELS="diffbot,openai-gpt-3.5,openai-gpt-4o"
VITE_ENV="DEV"
-VITE_TIME_PER_CHUNK=4
VITE_TIME_PER_PAGE=50
VITE_CHUNK_SIZE=5242880
VITE_LARGE_FILE_SIZE=5242880
diff --git a/frontend/src/API/Index.ts b/frontend/src/API/Index.ts
new file mode 100644
index 000000000..f4ad15cbe
--- /dev/null
+++ b/frontend/src/API/Index.ts
@@ -0,0 +1,7 @@
+import axios from 'axios';
+import { url } from '../utils/Utils';
+
+const api = axios.create({
+ baseURL: url(),
+});
+export default api;
diff --git a/frontend/src/App.css b/frontend/src/App.css
index ff084ffae..fe285c972 100644
--- a/frontend/src/App.css
+++ b/frontend/src/App.css
@@ -233,10 +233,8 @@
letter-spacing: 0;
line-height: 1.25rem;
width: max-content;
- height: 30px;
text-overflow: ellipsis;
white-space: nowrap;
- overflow: hidden;
}
.ndl-widget-content>div {
@@ -365,4 +363,13 @@
.widthunset{
width: initial !important;
height: initial !important;
+}
+
+.text-input-container {
+ transition: width 1.5s ease;
+ /* width: 100dvh; */
+}
+
+.text-input-container.search-initiated {
+ width: 60dvh;
}
\ No newline at end of file
diff --git a/frontend/src/components/ChatBot/ChatInfoModal.tsx b/frontend/src/components/ChatBot/ChatInfoModal.tsx
index 7bd837299..89c15ea72 100644
--- a/frontend/src/components/ChatBot/ChatInfoModal.tsx
+++ b/frontend/src/components/ChatBot/ChatInfoModal.tsx
@@ -18,13 +18,20 @@ import wikipedialogo from '../../assets/images/wikipedia.svg';
import youtubelogo from '../../assets/images/youtube.svg';
import gcslogo from '../../assets/images/gcs.webp';
import s3logo from '../../assets/images/s3logo.png';
-import { Chunk, Entity, ExtendedNode, GroupedEntity, UserCredentials, chatInfoMessage } from '../../types';
+import {
+ Chunk,
+ Entity,
+ ExtendedNode,
+ ExtendedRelationship,
+ GroupedEntity,
+ UserCredentials,
+ chatInfoMessage,
+} from '../../types';
import { useContext, useEffect, useMemo, useState } from 'react';
import HoverableLink from '../UI/HoverableLink';
import GraphViewButton from '../Graph/GraphViewButton';
import { chunkEntitiesAPI } from '../../services/ChunkEntitiesInfo';
import { useCredentials } from '../../context/UserCredentials';
-import type { Relationship } from '@neo4j-nvl/base';
import { calcWordColor } from '@neo4j-devtools/word-color';
import ReactMarkdown from 'react-markdown';
import { GlobeAltIconOutline } from '@neo4j-ndl/react/icons';
@@ -51,7 +58,7 @@ const ChatInfoModal: React.FC = ({
const [loading, setLoading] = useState(false);
const { userCredentials } = useCredentials();
const [nodes, setNodes] = useState([]);
- const [relationships, setRelationships] = useState([]);
+ const [relationships, setRelationships] = useState([]);
const [chunks, setChunks] = useState([]);
const themeUtils = useContext(ThemeWrapperContext);
const [, copy] = useCopyToClipboard();
@@ -168,7 +175,11 @@ const ChatInfoModal: React.FC = ({
) : (
{mode != 'graph' ? Sources used : <>>}
- {mode === 'graph+vector' || mode === 'graph' ? Top Entities used : <>>}
+ {mode === 'graph+vector' || mode === 'graph' || mode === 'graph+vector+fulltext' ? (
+ Top Entities used
+ ) : (
+ <>>
+ )}
{mode === 'graph' && cypher_query?.trim().length ? (
Generated Cypher Query
) : (
diff --git a/frontend/src/components/ChatBot/ChatModeToggle.tsx b/frontend/src/components/ChatBot/ChatModeToggle.tsx
index 4c0d54bc7..e82dfea4d 100644
--- a/frontend/src/components/ChatBot/ChatModeToggle.tsx
+++ b/frontend/src/components/ChatBot/ChatModeToggle.tsx
@@ -31,7 +31,12 @@ export default function ChatModeToggle({
() =>
chatModes?.map((m) => {
return {
- title: capitalize(m),
+ title: m.includes('+')
+ ? m
+ .split('+')
+ .map((s) => capitalize(s))
+ .join('+')
+ : capitalize(m),
onClick: () => {
setchatMode(m);
},
diff --git a/frontend/src/components/ChatBot/Info/InfoModal.tsx b/frontend/src/components/ChatBot/Info/InfoModal.tsx
index 0dd325731..cf1bbca47 100644
--- a/frontend/src/components/ChatBot/Info/InfoModal.tsx
+++ b/frontend/src/components/ChatBot/Info/InfoModal.tsx
@@ -6,13 +6,20 @@ import wikipedialogo from '../../../assets/images/Wikipedia-logo-v2.svg';
import youtubelogo from '../../../assets/images/youtube.png';
import gcslogo from '../../../assets/images/gcs.webp';
import s3logo from '../../../assets/images/s3logo.png';
-import { Chunk, Entity, ExtendedNode, GroupedEntity, UserCredentials, chatInfoMessage } from '../../../types';
+import {
+ Chunk,
+ Entity,
+ ExtendedNode,
+ ExtendedRelationship,
+ GroupedEntity,
+ UserCredentials,
+ chatInfoMessage,
+} from '../../../types';
import { useEffect, useMemo, useState } from 'react';
import HoverableLink from '../../UI/HoverableLink';
import GraphViewButton from '../../Graph/GraphViewButton';
import { chunkEntitiesAPI } from '../../../services/ChunkEntitiesInfo';
import { useCredentials } from '../../../context/UserCredentials';
-import type { Relationship } from '@neo4j-nvl/base';
import { calcWordColor } from '@neo4j-devtools/word-color';
import ReactMarkdown from 'react-markdown';
import { GlobeAltIconOutline } from '@neo4j-ndl/react/icons';
@@ -23,7 +30,7 @@ const InfoModal: React.FC = ({ sources, model, total_tokens, re
const [loading, setLoading] = useState(false);
const { userCredentials } = useCredentials();
const [nodes, setNodes] = useState([]);
- const [relationships, setRelationships] = useState([]);
+ const [relationships, setRelationships] = useState([]);
const [chunks, setChunks] = useState([]);
const parseEntity = (entity: Entity) => {
const { labels, properties } = entity;
diff --git a/frontend/src/components/Content.tsx b/frontend/src/components/Content.tsx
index ec4fad9bb..4bde5a9b4 100644
--- a/frontend/src/components/Content.tsx
+++ b/frontend/src/components/Content.tsx
@@ -31,6 +31,8 @@ import FallBackDialog from './UI/FallBackDialog';
import DeletePopUp from './Popups/DeletePopUp/DeletePopUp';
import GraphEnhancementDialog from './Popups/GraphEnhancementDialog';
import { tokens } from '@neo4j-ndl/base';
+import axios from 'axios';
+
const ConnectionModal = lazy(() => import('./Popups/ConnectionModal/ConnectionModal'));
const ConfirmationDialog = lazy(() => import('./Popups/LargeFilePopUp/ConfirmationDialog'));
let afterFirstRender = false;
@@ -180,6 +182,7 @@ const Content: React.FC = ({
};
const extractHandler = async (fileItem: CustomFile, uid: string) => {
+ queue.remove(fileItem.name as string);
try {
setFilesData((prevfiles) =>
prevfiles.map((curfile) => {
@@ -252,28 +255,45 @@ const Content: React.FC = ({
});
}
} catch (err: any) {
- const error = JSON.parse(err.message);
- if (Object.keys(error).includes('fileName')) {
- const { message } = error;
- const { fileName } = error;
- const errorMessage = error.message;
- setalertDetails({
- showAlert: true,
- alertType: 'error',
- alertMessage: message,
- });
- setFilesData((prevfiles) =>
- prevfiles.map((curfile) => {
- if (curfile.name == fileName) {
- return {
- ...curfile,
- status: 'Failed',
- errorMessage,
- };
- }
- return curfile;
- })
- );
+ if (err instanceof Error) {
+ try {
+ const error = JSON.parse(err.message);
+ if (Object.keys(error).includes('fileName')) {
+ setProcessedCount((prev) => {
+ if (prev == batchSize) {
+ return batchSize - 1;
+ }
+ return prev + 1;
+ });
+ const { message, fileName } = error;
+ queue.remove(fileName);
+ const errorMessage = error.message;
+ setalertDetails({
+ showAlert: true,
+ alertType: 'error',
+ alertMessage: message,
+ });
+ setFilesData((prevfiles) =>
+ prevfiles.map((curfile) => {
+ if (curfile.name == fileName) {
+ return { ...curfile, status: 'Failed', errorMessage };
+ }
+ return curfile;
+ })
+ );
+ } else {
+ console.error('Unexpected error format:', error);
+ }
+ } catch (parseError) {
+ if (axios.isAxiosError(err)) {
+ const axiosErrorMessage = err.response?.data?.message || err.message;
+ console.error('Axios error occurred:', axiosErrorMessage);
+ } else {
+ console.error('An unexpected error occurred:', err.message);
+ }
+ }
+ } else {
+ console.error('An unknown error occurred:', err);
}
}
};
@@ -302,7 +322,10 @@ const Content: React.FC = ({
return data;
};
- const addFilesToQueue = (remainingFiles: CustomFile[]) => {
+ const addFilesToQueue = async (remainingFiles: CustomFile[]) => {
+ if (!remainingFiles.length) {
+ await postProcessing(userCredentials as UserCredentials, postProcessingTasks);
+ }
remainingFiles.forEach((f) => {
setFilesData((prev) =>
prev.map((pf) => {
@@ -379,13 +402,11 @@ const Content: React.FC = ({
}
Promise.allSettled(data).then(async (_) => {
setextractLoading(false);
- await postProcessing(userCredentials as UserCredentials, postProcessingTasks);
});
- } else if (queueFiles && !queue.isEmpty()) {
+ } else if (queueFiles && !queue.isEmpty() && processingFilesCount < batchSize) {
data = scheduleBatchWiseProcess(queue.items, true);
Promise.allSettled(data).then(async (_) => {
setextractLoading(false);
- await postProcessing(userCredentials as UserCredentials, postProcessingTasks);
});
} else {
addFilesToQueue(filesTobeProcessed as CustomFile[]);
@@ -405,7 +426,6 @@ const Content: React.FC = ({
}
Promise.allSettled(data).then(async (_) => {
setextractLoading(false);
- await postProcessing(userCredentials as UserCredentials, postProcessingTasks);
});
} else {
const selectedNewFiles = childRef.current?.getSelectedRows().filter((f) => f.status === 'New');
diff --git a/frontend/src/components/Dropdown.tsx b/frontend/src/components/Dropdown.tsx
index f046bb8ff..ba949aec0 100644
--- a/frontend/src/components/Dropdown.tsx
+++ b/frontend/src/components/Dropdown.tsx
@@ -1,6 +1,6 @@
import { Dropdown, Tip } from '@neo4j-ndl/react';
import { OptionType, ReusableDropdownProps } from '../types';
-import { useMemo } from 'react';
+import { useMemo, useReducer } from 'react';
import { capitalize } from '../utils/Utils';
const DropdownComponent: React.FC = ({
@@ -13,6 +13,7 @@ const DropdownComponent: React.FC = ({
isDisabled,
value,
}) => {
+ const [disableTooltip, toggleDisableState] = useReducer((state) => !state, false);
const handleChange = (selectedOption: OptionType | null | void) => {
onSelect(selectedOption);
};
@@ -20,7 +21,7 @@ const DropdownComponent: React.FC = ({
return (
<>